aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--README.md35
-rw-r--r--res/HALK.pngbin0 -> 822 bytes
-rw-r--r--src/include/lexer.h11
-rw-r--r--src/include/token.h7
-rw-r--r--src/include/tree.h46
-rw-r--r--src/lexer.c60
-rw-r--r--src/token.c25
-rw-r--r--tree.txt30
9 files changed, 110 insertions, 105 deletions
diff --git a/Makefile b/Makefile
index d9602c2..b888564 100644
--- a/Makefile
+++ b/Makefile
@@ -6,6 +6,7 @@ DBG_CFLAGS := -Og -ggdb -pedantic -Wall -Wno-deprecated-declarations -fsanitize=
CFLAGS := ${REG_CFLAGS}
SRCS := $(wildcard src/*.c)
SRCS := $(filter-out src/parser.c, $(SRCS))
+SRCS := $(filter-out src/tree.c, $(SRCS))
OBJS := $(SRCS:.c=.o)
all: halk
diff --git a/README.md b/README.md
index 01ad2cb..5a42d7b 100644
--- a/README.md
+++ b/README.md
@@ -20,24 +20,31 @@ One must simply
$ halk examples/simple.halk
```
-. Running *HALK* with no arguments allows one to send arbitrary text.
+. Running *HALK* with no arguments allows one to lex(?) arbitrary text through stdin.
An example session is displayed below:
```text
$ halk
-> :str:var = 'Hello, World.';
-> ^D
+:str:var = "Hello, World";
+[==] HLKT: test passed: src/main.c/main/18
+[==] source gotten
+[==] source: :str:var = "Hello, World";
+
+[==] HLKT: test passed: src/main.c/main/24
+[==] preprocessor created
+[==] pre-processed source: :str:var="Hello, World";
+[==] HLKT: test passed: src/main.c/main/34
+[==] preprocessor ran
+[==] HLKT: test passed: src/main.c/main/39
+[==] HLKT: test passed: src/main.c/main/40
[==] lexer created
-[==] BEGIN INPUT
-:str:var = 'Hello, World.';
-[==] END INPUT
-[==] token type: [TOKEN_DEF_TAG] token value: [str]
-[==] token type: [TOKEN_DEF_TAG] token value: [var]
-[==] token type: [TOKEN_DEF_SET] token value: [=]
-[==] token type: [TOKEN_PRIM_STR] token value: [Hello, World.]
-[==] token type: [TOKEN_EXPR_END] token value: [;]
-[==] token type: [TOKEN_EOF] token value: []
-[==] source file closed
+[==] token/t=9 /v=str
+[==] token/t=9 /v=var
+[==] token/t=4 /v==
+[==] token/t=2 /v=Hello, World
+[==] token/t=3 /v=;
+[==] lexer ran
+[==] HLKT: all 5 tests passed
```
# Syntax
@@ -49,7 +56,7 @@ Note that all syntax described is liable to sudden and violent change.
Example programs can be found [here](../tree/examples).
- [x] Preprocessor
-- [ ] Lexer
+- [x] Lexer
- [ ] Abstract Syntax Tree
- [ ] Parser
- [ ] Doer
diff --git a/res/HALK.png b/res/HALK.png
new file mode 100644
index 0000000..a41b12e
--- /dev/null
+++ b/res/HALK.png
Binary files differ
diff --git a/src/include/lexer.h b/src/include/lexer.h
index b2bf9eb..83ace59 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -16,8 +16,10 @@ typedef struct LEXER_STRUC {
enum LEXER_STATE {
/* normal 1-character token */
LEXER_STATE_REG,
- /* character */
- LEXER_STATE_CHR,
+ /* definition tag */
+ LEXER_STATE_TAG,
+ /* escaped character in string */
+ LEXER_STATE_ESC,
/* string */
LEXER_STATE_STR,
/* definition */
@@ -46,8 +48,13 @@ void lexer_add_token(lexer_t* lexer, token_t* token);
lexer_do_reg() */
void lexer_add_current_char(lexer_t* lexer, int type);
+/* add first character of lexer's src to the value of the last token in tokenl, if it exists. otherwise, create new token and add it */
+void lexer_add_current_char_to_last_token(lexer_t* lexer, int type);
+
/* handle regular state */
void lexer_do_reg(lexer_t*);
+/* handle definition tag state*/
+void lexer_do_tag(lexer_t*);
/* handle character state */
void lexer_do_chr(lexer_t*);
/* handle string state */
diff --git a/src/include/token.h b/src/include/token.h
index 5a3a36c..a186fa9 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -34,11 +34,16 @@ typedef struct TOKEN_STRUC {
} token_t;
/* creates a token */
-token_t* token_init(int type, char* val);
+token_t* token_init(int type, char val);
/* destroys a token **and all tokens contained in nxt** **Make sure to set the nxt of any parent tokens to NULL** */
void token_destroy(token_t* token);
/* return pointer to the last token */
token_t* token_last(token_t* token);
+/* add a character to the token value */
+void token_add_char(token_t*, char);
+
+/* print a token -- for debugging purposes */
+void token_print(token_t* token);
#endif
diff --git a/src/include/tree.h b/src/include/tree.h
index a2b71da..88287a4 100644
--- a/src/include/tree.h
+++ b/src/include/tree.h
@@ -4,44 +4,26 @@
#include <stdlib.h>
typedef struct TREE_STRUC {
- enum {
- TREE_COMP,
- TREE_DEF,
- TREE_CALL,
- TREE_TYPE_STR,
+ enum TREE_TYPE {
TREE_TYPE_INT,
+ TREE_TYPE_STR,
+ TREE_TYPE_DEF,
+ TREE_TYPE_CAL,
+ TREE_TYPE_COND,
} type;
union {
- struct { // === "COMPOUND" ===
- struct TREE_STRUC** value;
- size_t size;
- } comp;
-
- struct { // === DEFINITIONS ===
- char* type; // the definition type
- char** tags; // the definition tags
- size_t tags_size; // the number of tags
- char* name; // the definition name
- struct TREE_STRUC** args; // the arguments the definition will accept
- size_t args_size; // the number of arguments
- struct TREE_STRUC* value; // value of definition
- } def;
-
- struct { // === CALLS ===
- char* target; // name of definition being called
- struct TREE_STRUC** args; // arguments passed to definition
- size_t args_size; // the number of arguments
- } call;
+ struct {
+ int val;
+ } tree_int_t;
- // === TYPES ===
- struct { // strings
- char* value;
- } type_str;
+ struct {
+ char* val;
+ } tree_str_t;
- struct { // integers
- int value;
- } type_int;
+ struct {
+ char* id;
+ } tree_def_t;
} data;
} tree_t;
diff --git a/src/lexer.c b/src/lexer.c
index ba0e8e1..7f36b98 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -36,31 +36,33 @@ void lexer_add_token(lexer_t* lexer, token_t* token) {
lexer->tokenl_last = token;
}
- log_inf("token/v:%s\t/t:%d", token->val, token->type);
lexer->tokenc ++;
}
void lexer_add_current_char(lexer_t* lexer, int type) {
- char* c; /* get the current character as a string */
token_t* t; /* the token to be added */
- c = ecalloc(2, sizeof(char));
- c[0] = *lexer->src;
- c[1] = '\0';
-
- t = token_init(type, c);
+ t = token_init(type, *lexer->src);
lexer_add_token(lexer, t);
}
+void lexer_add_current_char_to_last_token(lexer_t* lexer, int type) {
+ if (lexer->tokenl_last && lexer->tokenl_last->type == type) {
+ token_add_char(lexer->tokenl_last, *lexer->src);
+ } else {
+ lexer_add_current_char(lexer, type);
+ }
+}
+
void lexer_do_reg(lexer_t* lexer) {
switch (*lexer->src) {
case SYNTAX_APPLY:
lexer_add_current_char(lexer, TOKEN_APPLY);
break;
case SYNTAX_TAG_DELIM:
- lexer_add_current_char(lexer, TOKEN_TAG_DELIM);
+ lexer->state = LEXER_STATE_TAG;
break;
case SYNTAX_NAMESPACE_DELIM:
lexer_add_current_char(lexer, TOKEN_NAMESPACE_DELIM);
@@ -83,39 +85,49 @@ void lexer_do_reg(lexer_t* lexer) {
case SYNTAX_EXPR_END:
lexer_add_current_char(lexer, TOKEN_EXPR_END);
break;
- case SYNTAX_STR_DELIM:
- lexer_add_current_char(lexer, TOKEN_STR_DELIM);
- break;
- case SYNTAX_CHAR_DELIM:
- lexer_add_current_char(lexer, TOKEN_CHAR_DELIM);
- break;
case SYNTAX_LIST_DELIM:
lexer_add_current_char(lexer, TOKEN_LIST_DELIM);
break;
+ case SYNTAX_STR_DELIM:
+ lexer->state = LEXER_STATE_STR;
+ break;
default:
lexer_add_current_char(lexer, TOKEN_UNKNOWN);
}
}
-void lexer_do_chr(lexer_t* lexer) {
- if (*lexer->src == '\'') {
- lexer->state = LEXER_STATE_REG;
- } else {
- token_t* t;
-
- t = token_init(TOKEN_CHAR, *lexer->src);
-
- lexer_add_token(lexer, t);
+void lexer_do_tag(lexer_t* lexer) {
+ switch (*lexer->src) {
+ case SYNTAX_SET:
+ lexer_add_current_char(lexer, TOKEN_SET);
+ lexer->state = LEXER_STATE_REG;
+ break;
+ case SYNTAX_APPLY:
+ lexer_add_current_char(lexer, TOKEN_APPLY);
+ lexer->state = LEXER_STATE_REG;
+ break;
+ case SYNTAX_TAG_DELIM:
+ lexer_add_token(lexer, token_init(TOKEN_TAG, '\0'));
+ break;
+ default: lexer_add_current_char_to_last_token(lexer, TOKEN_TAG);
}
}
void lexer_do_str(lexer_t* lexer) {
-
+ if (*lexer->src == SYNTAX_STR_DELIM) {
+ lexer->state = LEXER_STATE_REG;
+ } else {
+ lexer_add_current_char_to_last_token(lexer, TOKEN_STR);
+ }
}
void lexer_run(lexer_t* lexer) {
while (*lexer->src) {
if (lexer->state == LEXER_STATE_REG) { lexer_do_reg(lexer); }
+ else if (lexer->state == LEXER_STATE_TAG) { lexer_do_tag(lexer); }
+ else if (lexer->state == LEXER_STATE_STR) { lexer_do_str(lexer); }
lexer->src ++;
}
+
+ token_print(lexer->tokenl);
}
diff --git a/src/token.c b/src/token.c
index ece32f4..935f23e 100644
--- a/src/token.c
+++ b/src/token.c
@@ -2,12 +2,14 @@
#include "include/token.h"
-token_t* token_init(int type, char* val) {
+token_t* token_init(int type, char val) {
token_t* token;
token = emalloc(sizeof(struct TOKEN_STRUC));
token->type = type;
- token->val = val;
+ token->val = emalloc(2);
+ *token->val = val;
+ token->val[1] = '\0';
token->nxt = NULL;
return token;
@@ -32,3 +34,22 @@ token_t* token_last(token_t* token) {
return t;
}
+
+void token_add_char(token_t* token, char c) {
+ size_t orig;
+
+ orig = strlen(token->val);
+
+ token->val = erealloc(token->val, orig + sizeof c + 1);
+ token->val[orig] = c;
+ token->val[orig + 1] = '\0';
+}
+
+void token_print(token_t* token) {
+
+ log_dbg("token/t=%d\t/v=%s", token->type, token->val);
+
+ if (token->nxt) {
+ token_print(token->nxt);
+ }
+}
diff --git a/tree.txt b/tree.txt
deleted file mode 100644
index 39340fe..0000000
--- a/tree.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-The Expr
-========
- [ block ] ⇐ A list of exprs.
- │ ┌┘
- │ │
- [ expr ] ── [ lit ] ⇐ A literal value; "base case" for the tree.
- │ │ ├── type
- ┌──┘ └──┐ └─ value
-[ def ] [ call ]
- │ │
- ├─ [target] ← id ├─ [target] ← id ⇐ An id is a pointer to another part of the tree.
- └── [value] ← expr │ It also contains the flags used in the definition.
- └──── [arg] ← expr
-
-Example Expr Tree
-=================
-[ block ]
- │
- ├─ [ def ]
- │ │
- │ ├─ [target] → hello
- │ └── [value] → [ lit ]
- │ ├── type → str
- │ └─ value → Hello, World
- ├─ [ call ]
- │ │
- │ ├─ [target] → print
- │ └──── [arg] → hello
- ...
-