aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorc+12023-10-10 11:26:44 -0400
committerc+12023-10-10 11:26:44 -0400
commit58c7a71a50318940e747c365cc3f207dba432977 (patch)
tree7d173f5433fba1b01c531610a0bf70684b8ca1de /src
parent78befa147eccfb169bf994da3d9bfba9be3631a6 (diff)
fixed source.c, fixed preprocessor mem leaks, implemented new lexer
Diffstat (limited to 'src')
-rw-r--r--src/include/hlkt.h10
-rw-r--r--src/include/lexer.h5
-rw-r--r--src/include/pp.h55
-rw-r--r--src/include/token.h3
-rw-r--r--src/include/util.h10
-rw-r--r--src/lexer.c11
-rw-r--r--src/main.c24
-rw-r--r--src/pp.c84
-rw-r--r--src/source.c32
-rw-r--r--src/token.c6
-rw-r--r--src/util.c46
11 files changed, 221 insertions, 65 deletions
diff --git a/src/include/hlkt.h b/src/include/hlkt.h
index 4496ce1..ebcb7f6 100644
--- a/src/include/hlkt.h
+++ b/src/include/hlkt.h
@@ -10,17 +10,19 @@ static int hlkt_failed = 0; /* number of tests that have failed */
#define HLKT_LOG() HLKT_HIDE( \
if ((hlkt_run > 0) && (hlkt_failed > 0)) { \
- log_war("HLKT: %d/%d tests failed", hlkt_failed, hlkt_run); \
+ log_err("HLKT: %d/%d tests failed", hlkt_failed, hlkt_run); \
} else { \
- log_inf("HLKT: all %d tests passed", hlkt_run); \
+ log_dbg("HLKT: all %d tests passed", hlkt_run); \
} \
)
#define HLKT_ASS(pred) HLKT_HIDE( \
hlkt_run ++; \
- if (! pred) { \
+ if (! (pred)) { \
hlkt_failed ++; \
- log_err("HLKT: test [%s] failed: %s:%s:%d", #pred, __FILE__, __func__, __LINE__); \
+ log_war("HLKT: test failed: %s/%s/%d", __FILE__, __func__, __LINE__); \
+ } else { \
+ log_dbg("HLKT: test passed: %s/%s/%d", __FILE__, __func__, __LINE__); \
} \
)
diff --git a/src/include/lexer.h b/src/include/lexer.h
index 173c57d..b2bf9eb 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -28,6 +28,9 @@ typedef struct LEXER_STRUC {
/* the linked list of tokens generated */
token_t* tokenl;
+ /* pointer to the last token in tokenl */
+ token_t* tokenl_last;
+ /* number of tokens in tokenl */
int tokenc;
} lexer_t;
@@ -37,7 +40,7 @@ lexer_t* lexer_init (char* src);
/* destroy lexer **but not src or tokenl** */
void lexer_destroy (lexer_t* lexer);
-/* add token to tokenv */
+/* add token to tokenl */
void lexer_add_token(lexer_t* lexer, token_t* token);
/* add the current character as a token to tokenl -- utility function for
lexer_do_reg() */
diff --git a/src/include/pp.h b/src/include/pp.h
new file mode 100644
index 0000000..d82907c
--- /dev/null
+++ b/src/include/pp.h
@@ -0,0 +1,55 @@
+#ifndef PP_H
+#define PP_H
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+#include "syntax.h"
+
+/* TODO */
+typedef struct MACRO_STRUC {
+ char* id;
+ char* val;
+} macro_t;
+
+/*
+ preprocessor struct
+
+ TODO: keep track of macros
+*/
+typedef struct PP_STRUC {
+ /* original source */
+ char* src;
+
+ /* pre-processed source */
+ char* psrc;
+
+ /* what the preprocessor is looking at right now */
+ enum PP_STATE {
+ PP_STATE_REG, /* regular */
+ PP_STATE_STR, /* string */
+ PP_STATE_COM, /* comment */
+ PP_STATE_ESC, /* escaped character in string */
+ /* PP_STATE_MCO, */ /* macro */
+ } state;
+} pp_t;
+
+/* creates a new preprocessor from some source code */
+pp_t* pp_init(char*);
+
+/* destroys the preprocessor **but not the pre-processed source** */
+void pp_destroy(pp_t*);
+
+/* copy over the current character from src to psrc */
+void pp_cpy_char(pp_t*);
+
+void pp_do_reg(pp_t*);
+void pp_do_str(pp_t*);
+void pp_do_com(pp_t*);
+
+/* run the preprocessor */
+void pp_run(pp_t*);
+
+#endif
+
diff --git a/src/include/token.h b/src/include/token.h
index 802f13d..6779755 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -2,6 +2,7 @@
#define TOKEN_H
#include "util.h"
+#include "hlkt.h"
/* token struct */
typedef struct TOKEN_STRUC {
@@ -35,7 +36,7 @@ typedef struct TOKEN_STRUC {
/* creates a token */
token_t* token_init(int type, char* val);
-/* destroys a token **and all tokens contained in nxt** */
+/* destroys a token **and all tokens contained in nxt** **Make sure to set the nxt of any parent tokens to NULL** */
void token_destroy(token_t* token);
/* return pointer to the last token */
diff --git a/src/include/util.h b/src/include/util.h
index 712af43..cfc85c2 100644
--- a/src/include/util.h
+++ b/src/include/util.h
@@ -7,16 +7,18 @@
#include <stdio.h>
-/* die and leave message */
-void die(const char*, ...);
-/* log an error */
-void log_err(const char*, ...);
+/* log some debug information */
+void log_dbg(const char*, ...);
/* log some information */
void log_inf(const char*, ...);
/* log something with no formatting */
void log_raw(const char*, ...);
/* log a warning */
void log_war(const char*, ...);
+/* log an error */
+void log_err(const char*, ...);
+/* die and leave message */
+void die(const char*, ...);
/* if calloc() returns null, die */
void* ecalloc(size_t, size_t);
diff --git a/src/lexer.c b/src/lexer.c
index e9475b6..efdc718 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -13,6 +13,7 @@ lexer_t* lexer_init(char* src) {
lexer->src = src;
lexer->state = LEXER_STATE_REG;
lexer->tokenl = NULL;
+ lexer->tokenl_last = NULL;
lexer->tokenc = 0;
return lexer;
@@ -25,8 +26,13 @@ void lexer_destroy(lexer_t* lexer) {
void lexer_add_token(lexer_t* lexer, token_t* token) {
token_t* t;
- t = token_last(lexer->tokenl)->nxt;
- t = token;
+ if (lexer->tokenl) {
+ lexer->tokenl_last->nxt = token;
+ lexer->tokenl_last = token;
+ } else {
+ lexer->tokenl = token;
+ lexer->tokenl_last = token;
+ }
log_inf("token/v:%s\t/t:%d", token->val, token->type);
@@ -53,7 +59,6 @@ void lexer_do_reg(lexer_t* lexer) {
break;
default:
lexer_add_current_char(lexer, TOKEN_UNKNOWN);
-
}
}
diff --git a/src/main.c b/src/main.c
index c0a8d43..7146c14 100644
--- a/src/main.c
+++ b/src/main.c
@@ -2,11 +2,11 @@
#include <stdlib.h>
#include "include/util.h"
+#include "include/hlkt.h"
+#include "include/source.h"
#include "include/token.h"
#include "include/pp.h"
#include "include/lexer.h"
-#include "include/source.h"
-#include "include/hlkt.h"
int main(int argc, char* argv[]) {
char* src; /* the source "code" */
@@ -16,33 +16,37 @@ int main(int argc, char* argv[]) {
/* get source */
src = source_get(argv[1]);
HLKT_ASS(src);
- log_inf("source gotten");
+ log_dbg("source gotten");
+ log_inf("source: %s", src);
/* create pre-processor */
pp = pp_init(src);
HLKT_ASS(pp);
- log_inf("preprocessor created");
+ log_dbg("preprocessor created");
/* pre-process source */
pp_run(pp);
free(src);
src = pp->psrc;
+ log_dbg(pp->psrc);
+ /* destroy pre-processor */
+ pp_destroy(pp);
HLKT_ASS(src);
- log_inf("preprocessor ran");
+ log_dbg("preprocessor ran");
/* create lexer */
lexer = lexer_init(src);
HLKT_ASS(lexer);
- log_inf("lexer created");
+ HLKT_ASS(lexer->src == src);
+ log_dbg("lexer created");
/* run lexer */
lexer_run(lexer);
- log_inf("lexer ran");
+ log_dbg("lexer ran");
- /* clean up */
- pp_destroy(pp);
+ /* clean up lexer stuff */
+ if (lexer->tokenl) { token_destroy(lexer->tokenl); } /* temp until parser eats tokens */
lexer_destroy(lexer);
- token_destroy(lexer->tokenl);
free(src);
HLKT_LOG();
diff --git a/src/pp.c b/src/pp.c
new file mode 100644
index 0000000..6664879
--- /dev/null
+++ b/src/pp.c
@@ -0,0 +1,84 @@
+#include "include/pp.h"
+
+pp_t* pp_init(char* src) {
+ pp_t* pp;
+
+ pp = ecalloc(1, sizeof(pp_t));
+
+ pp->src = src;
+ pp->psrc = (char*) emalloc(1);
+ pp->psrc[0] = '\0';
+
+ pp->state = PP_STATE_REG;
+
+ return pp;
+}
+
+void pp_destroy(pp_t* pp) {
+ free(pp);
+}
+
+void pp_cpy_char(pp_t* pp) {
+ int plen = strlen(pp->psrc);
+ pp->psrc = erealloc(pp->psrc, (plen + 2) * sizeof(char));
+
+ pp->psrc[plen] = *pp->src;
+ pp->psrc[plen + 1] = '\0';
+}
+
+void pp_do_reg(pp_t* pp) {
+ switch (*pp->src) {
+ case SYNTAX_COMMENT_DELIM:
+ pp->state = PP_STATE_COM;
+ break;
+ case SYNTAX_STR_DELIM:
+ pp_cpy_char(pp);
+ pp->state = PP_STATE_STR;
+ break;
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ /* skip whitespace */
+ break;
+
+ default:
+ pp_cpy_char(pp);
+ }
+}
+
+void pp_do_str(pp_t* pp) {
+ if (pp->state == PP_STATE_ESC) {
+ pp_cpy_char(pp);
+ pp->state = PP_STATE_STR;
+ } else {
+ if (*pp->src == SYNTAX_ESC) {
+ pp_cpy_char(pp);
+ pp->state = PP_STATE_ESC;
+ } else if (*pp->src == SYNTAX_STR_DELIM) {
+ pp_cpy_char(pp);
+ pp->state = PP_STATE_REG;
+ }
+ else {
+ pp_cpy_char(pp);
+ }
+ }
+}
+
+void pp_do_com(pp_t* pp) {
+ if (*pp->src == SYNTAX_COMMENT_DELIM) {
+ pp->state = PP_STATE_REG;
+ }
+
+ /* let pp_run skip chars in comments */
+}
+
+void pp_run(pp_t* pp) {
+ while (*pp->src) {
+ if (pp->state == PP_STATE_REG) { pp_do_reg(pp); }
+ else if (pp->state == PP_STATE_STR || pp->state == PP_STATE_ESC) { pp_do_str(pp); }
+ else if (pp->state == PP_STATE_COM) { pp_do_com(pp); }
+
+ pp->src ++;
+ }
+}
diff --git a/src/source.c b/src/source.c
index b098751..4874d5a 100644
--- a/src/source.c
+++ b/src/source.c
@@ -6,13 +6,12 @@ char* source_get(char* arg) {
return arg?
source_get_from_fpath(arg):
source_get_from_stdin();
-
}
char* source_get_from_fpath(char* path) {
- FILE* f; // the file to read from
- long f_size; // the size of the file
- char* src; // the source code to return
+ FILE* f;
+ long f_size;
+ char* src;
f = fopen(path, "rb");
if (!f) { die("source file not found: %s", path); }
@@ -21,7 +20,7 @@ char* source_get_from_fpath(char* path) {
f_size = ftell(f);
rewind(f);
- src = calloc(1, f_size + 1);
+ src = ecalloc(1, f_size + 1);
if ((fread(src, f_size, 1, f) != 1) || !src) {
fclose(f);
@@ -33,28 +32,11 @@ char* source_get_from_fpath(char* path) {
}
char* source_get_from_stdin() {
- size_t len; // the length of the given source
- char* src; // the source code to return
-
- len = 0;
- src = calloc(len, sizeof(char));
-
- printf("> ");
-
- while (src[len - 1] != EOF) {
- char c; // the character being read
-
- if (src[len - 1] == '\n') { printf("> "); }
-
- c = getchar();
- src = realloc(src, (len + sizeof(char)));
- memcpy(src + len, &c, sizeof(char));
- len += sizeof(char);
- }
+ char* src;
- src[len - 1] = '\0'; // null terminate
+ src = ecalloc(256, sizeof(char));
- putchar('\n');
+ src = fgets(src, 256, stdin);
return src;
}
diff --git a/src/token.c b/src/token.c
index 26af598..ece32f4 100644
--- a/src/token.c
+++ b/src/token.c
@@ -14,6 +14,12 @@ token_t* token_init(int type, char* val) {
}
void token_destroy(token_t* token) {
+ if (token->nxt) {
+ token_destroy(token->nxt);
+ token->nxt = NULL;
+ }
+
+ free(token->val);
free(token);
}
diff --git a/src/util.c b/src/util.c
index a175f6b..3a236cd 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1,29 +1,16 @@
#include "include/util.h"
-void die(const char* fmt, ...) {
- va_list ap;
-
- fprintf(stderr, "[\e[31;1m==\e[0m] FATAL ERROR ");
-
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- fprintf(stderr, "\n");
-
- exit(1);
-}
-
-void log_err(const char* fmt, ...) {
+void log_dbg(const char* fmt, ...) {
va_list ap;
- fprintf(stderr, "[\e[31m==\e[0m] ERROR ");
+ fprintf(stdout, "\e[3m\e[37m[==] ");
va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
+ vfprintf(stdout, fmt, ap);
va_end(ap);
- fprintf(stderr, "\n");
+ fprintf(stdout, "\e[0m\n");
}
void log_inf(const char* fmt, ...) {
@@ -60,6 +47,31 @@ void log_war(const char* fmt, ...) {
fprintf(stderr, "\n");
}
+void log_err(const char* fmt, ...) {
+ va_list ap;
+
+ fprintf(stderr, "[\e[31m==\e[0m] ERROR ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "\n");
+}
+
+void die(const char* fmt, ...) {
+ va_list ap;
+
+ fprintf(stderr, "[\e[31;1m==\e[0m] FATAL ERROR ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, "\n");
+
+ exit(1);
+}
+
void* ecalloc(size_t nmemb, size_t size) {
void* p;