From 58c7a71a50318940e747c365cc3f207dba432977 Mon Sep 17 00:00:00 2001 From: c+1 Date: Tue, 10 Oct 2023 11:26:44 -0400 Subject: fixed source.c, fixed preprocessor mem leaks, implemented new lexer --- src/include/hlkt.h | 10 ++++--- src/include/lexer.h | 5 +++- src/include/pp.h | 55 +++++++++++++++++++++++++++++++++++ src/include/token.h | 3 +- src/include/util.h | 10 ++++--- src/lexer.c | 11 +++++-- src/main.c | 24 ++++++++------- src/pp.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/source.c | 32 +++++--------------- src/token.c | 6 ++++ src/util.c | 46 ++++++++++++++++++----------- 11 files changed, 221 insertions(+), 65 deletions(-) create mode 100644 src/include/pp.h create mode 100644 src/pp.c (limited to 'src') diff --git a/src/include/hlkt.h b/src/include/hlkt.h index 4496ce1..ebcb7f6 100644 --- a/src/include/hlkt.h +++ b/src/include/hlkt.h @@ -10,17 +10,19 @@ static int hlkt_failed = 0; /* number of tests that have failed */ #define HLKT_LOG() HLKT_HIDE( \ if ((hlkt_run > 0) && (hlkt_failed > 0)) { \ - log_war("HLKT: %d/%d tests failed", hlkt_failed, hlkt_run); \ + log_err("HLKT: %d/%d tests failed", hlkt_failed, hlkt_run); \ } else { \ - log_inf("HLKT: all %d tests passed", hlkt_run); \ + log_dbg("HLKT: all %d tests passed", hlkt_run); \ } \ ) #define HLKT_ASS(pred) HLKT_HIDE( \ hlkt_run ++; \ - if (! pred) { \ + if (! (pred)) { \ hlkt_failed ++; \ - log_err("HLKT: test [%s] failed: %s:%s:%d", #pred, __FILE__, __func__, __LINE__); \ + log_war("HLKT: test failed: %s/%s/%d", __FILE__, __func__, __LINE__); \ + } else { \ + log_dbg("HLKT: test passed: %s/%s/%d", __FILE__, __func__, __LINE__); \ } \ ) diff --git a/src/include/lexer.h b/src/include/lexer.h index 173c57d..b2bf9eb 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -28,6 +28,9 @@ typedef struct LEXER_STRUC { /* the linked list of tokens generated */ token_t* tokenl; + /* pointer to the last token in tokenl */ + token_t* tokenl_last; + /* number of tokens in tokenl */ int tokenc; } lexer_t; @@ -37,7 +40,7 @@ lexer_t* lexer_init (char* src); /* destroy lexer **but not src or tokenl** */ void lexer_destroy (lexer_t* lexer); -/* add token to tokenv */ +/* add token to tokenl */ void lexer_add_token(lexer_t* lexer, token_t* token); /* add the current character as a token to tokenl -- utility function for lexer_do_reg() */ diff --git a/src/include/pp.h b/src/include/pp.h new file mode 100644 index 0000000..d82907c --- /dev/null +++ b/src/include/pp.h @@ -0,0 +1,55 @@ +#ifndef PP_H +#define PP_H + +#include +#include + +#include "util.h" +#include "syntax.h" + +/* TODO */ +typedef struct MACRO_STRUC { + char* id; + char* val; +} macro_t; + +/* + preprocessor struct + + TODO: keep track of macros +*/ +typedef struct PP_STRUC { + /* original source */ + char* src; + + /* pre-processed source */ + char* psrc; + + /* what the preprocessor is looking at right now */ + enum PP_STATE { + PP_STATE_REG, /* regular */ + PP_STATE_STR, /* string */ + PP_STATE_COM, /* comment */ + PP_STATE_ESC, /* escaped character in string */ + /* PP_STATE_MCO, */ /* macro */ + } state; +} pp_t; + +/* creates a new preprocessor from some source code */ +pp_t* pp_init(char*); + +/* destroys the preprocessor **but not the pre-processed source** */ +void pp_destroy(pp_t*); + +/* copy over the current character from src to psrc */ +void pp_cpy_char(pp_t*); + +void pp_do_reg(pp_t*); +void pp_do_str(pp_t*); +void pp_do_com(pp_t*); + +/* run the preprocessor */ +void pp_run(pp_t*); + +#endif + diff --git a/src/include/token.h b/src/include/token.h index 802f13d..6779755 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -2,6 +2,7 @@ #define TOKEN_H #include "util.h" +#include "hlkt.h" /* token struct */ typedef struct TOKEN_STRUC { @@ -35,7 +36,7 @@ typedef struct TOKEN_STRUC { /* creates a token */ token_t* token_init(int type, char* val); -/* destroys a token **and all tokens contained in nxt** */ +/* destroys a token **and all tokens contained in nxt** **Make sure to set the nxt of any parent tokens to NULL** */ void token_destroy(token_t* token); /* return pointer to the last token */ diff --git a/src/include/util.h b/src/include/util.h index 712af43..cfc85c2 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -7,16 +7,18 @@ #include -/* die and leave message */ -void die(const char*, ...); -/* log an error */ -void log_err(const char*, ...); +/* log some debug information */ +void log_dbg(const char*, ...); /* log some information */ void log_inf(const char*, ...); /* log something with no formatting */ void log_raw(const char*, ...); /* log a warning */ void log_war(const char*, ...); +/* log an error */ +void log_err(const char*, ...); +/* die and leave message */ +void die(const char*, ...); /* if calloc() returns null, die */ void* ecalloc(size_t, size_t); diff --git a/src/lexer.c b/src/lexer.c index e9475b6..efdc718 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -13,6 +13,7 @@ lexer_t* lexer_init(char* src) { lexer->src = src; lexer->state = LEXER_STATE_REG; lexer->tokenl = NULL; + lexer->tokenl_last = NULL; lexer->tokenc = 0; return lexer; @@ -25,8 +26,13 @@ void lexer_destroy(lexer_t* lexer) { void lexer_add_token(lexer_t* lexer, token_t* token) { token_t* t; - t = token_last(lexer->tokenl)->nxt; - t = token; + if (lexer->tokenl) { + lexer->tokenl_last->nxt = token; + lexer->tokenl_last = token; + } else { + lexer->tokenl = token; + lexer->tokenl_last = token; + } log_inf("token/v:%s\t/t:%d", token->val, token->type); @@ -53,7 +59,6 @@ void lexer_do_reg(lexer_t* lexer) { break; default: lexer_add_current_char(lexer, TOKEN_UNKNOWN); - } } diff --git a/src/main.c b/src/main.c index c0a8d43..7146c14 100644 --- a/src/main.c +++ b/src/main.c @@ -2,11 +2,11 @@ #include #include "include/util.h" +#include "include/hlkt.h" +#include "include/source.h" #include "include/token.h" #include "include/pp.h" #include "include/lexer.h" -#include "include/source.h" -#include "include/hlkt.h" int main(int argc, char* argv[]) { char* src; /* the source "code" */ @@ -16,33 +16,37 @@ int main(int argc, char* argv[]) { /* get source */ src = source_get(argv[1]); HLKT_ASS(src); - log_inf("source gotten"); + log_dbg("source gotten"); + log_inf("source: %s", src); /* create pre-processor */ pp = pp_init(src); HLKT_ASS(pp); - log_inf("preprocessor created"); + log_dbg("preprocessor created"); /* pre-process source */ pp_run(pp); free(src); src = pp->psrc; + log_dbg(pp->psrc); + /* destroy pre-processor */ + pp_destroy(pp); HLKT_ASS(src); - log_inf("preprocessor ran"); + log_dbg("preprocessor ran"); /* create lexer */ lexer = lexer_init(src); HLKT_ASS(lexer); - log_inf("lexer created"); + HLKT_ASS(lexer->src == src); + log_dbg("lexer created"); /* run lexer */ lexer_run(lexer); - log_inf("lexer ran"); + log_dbg("lexer ran"); - /* clean up */ - pp_destroy(pp); + /* clean up lexer stuff */ + if (lexer->tokenl) { token_destroy(lexer->tokenl); } /* temp until parser eats tokens */ lexer_destroy(lexer); - token_destroy(lexer->tokenl); free(src); HLKT_LOG(); diff --git a/src/pp.c b/src/pp.c new file mode 100644 index 0000000..6664879 --- /dev/null +++ b/src/pp.c @@ -0,0 +1,84 @@ +#include "include/pp.h" + +pp_t* pp_init(char* src) { + pp_t* pp; + + pp = ecalloc(1, sizeof(pp_t)); + + pp->src = src; + pp->psrc = (char*) emalloc(1); + pp->psrc[0] = '\0'; + + pp->state = PP_STATE_REG; + + return pp; +} + +void pp_destroy(pp_t* pp) { + free(pp); +} + +void pp_cpy_char(pp_t* pp) { + int plen = strlen(pp->psrc); + pp->psrc = erealloc(pp->psrc, (plen + 2) * sizeof(char)); + + pp->psrc[plen] = *pp->src; + pp->psrc[plen + 1] = '\0'; +} + +void pp_do_reg(pp_t* pp) { + switch (*pp->src) { + case SYNTAX_COMMENT_DELIM: + pp->state = PP_STATE_COM; + break; + case SYNTAX_STR_DELIM: + pp_cpy_char(pp); + pp->state = PP_STATE_STR; + break; + case ' ': + case '\n': + case '\r': + case '\t': + /* skip whitespace */ + break; + + default: + pp_cpy_char(pp); + } +} + +void pp_do_str(pp_t* pp) { + if (pp->state == PP_STATE_ESC) { + pp_cpy_char(pp); + pp->state = PP_STATE_STR; + } else { + if (*pp->src == SYNTAX_ESC) { + pp_cpy_char(pp); + pp->state = PP_STATE_ESC; + } else if (*pp->src == SYNTAX_STR_DELIM) { + pp_cpy_char(pp); + pp->state = PP_STATE_REG; + } + else { + pp_cpy_char(pp); + } + } +} + +void pp_do_com(pp_t* pp) { + if (*pp->src == SYNTAX_COMMENT_DELIM) { + pp->state = PP_STATE_REG; + } + + /* let pp_run skip chars in comments */ +} + +void pp_run(pp_t* pp) { + while (*pp->src) { + if (pp->state == PP_STATE_REG) { pp_do_reg(pp); } + else if (pp->state == PP_STATE_STR || pp->state == PP_STATE_ESC) { pp_do_str(pp); } + else if (pp->state == PP_STATE_COM) { pp_do_com(pp); } + + pp->src ++; + } +} diff --git a/src/source.c b/src/source.c index b098751..4874d5a 100644 --- a/src/source.c +++ b/src/source.c @@ -6,13 +6,12 @@ char* source_get(char* arg) { return arg? source_get_from_fpath(arg): source_get_from_stdin(); - } char* source_get_from_fpath(char* path) { - FILE* f; // the file to read from - long f_size; // the size of the file - char* src; // the source code to return + FILE* f; + long f_size; + char* src; f = fopen(path, "rb"); if (!f) { die("source file not found: %s", path); } @@ -21,7 +20,7 @@ char* source_get_from_fpath(char* path) { f_size = ftell(f); rewind(f); - src = calloc(1, f_size + 1); + src = ecalloc(1, f_size + 1); if ((fread(src, f_size, 1, f) != 1) || !src) { fclose(f); @@ -33,28 +32,11 @@ char* source_get_from_fpath(char* path) { } char* source_get_from_stdin() { - size_t len; // the length of the given source - char* src; // the source code to return - - len = 0; - src = calloc(len, sizeof(char)); - - printf("> "); - - while (src[len - 1] != EOF) { - char c; // the character being read - - if (src[len - 1] == '\n') { printf("> "); } - - c = getchar(); - src = realloc(src, (len + sizeof(char))); - memcpy(src + len, &c, sizeof(char)); - len += sizeof(char); - } + char* src; - src[len - 1] = '\0'; // null terminate + src = ecalloc(256, sizeof(char)); - putchar('\n'); + src = fgets(src, 256, stdin); return src; } diff --git a/src/token.c b/src/token.c index 26af598..ece32f4 100644 --- a/src/token.c +++ b/src/token.c @@ -14,6 +14,12 @@ token_t* token_init(int type, char* val) { } void token_destroy(token_t* token) { + if (token->nxt) { + token_destroy(token->nxt); + token->nxt = NULL; + } + + free(token->val); free(token); } diff --git a/src/util.c b/src/util.c index a175f6b..3a236cd 100644 --- a/src/util.c +++ b/src/util.c @@ -1,29 +1,16 @@ #include "include/util.h" -void die(const char* fmt, ...) { - va_list ap; - - fprintf(stderr, "[\e[31;1m==\e[0m] FATAL ERROR "); - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - fprintf(stderr, "\n"); - - exit(1); -} - -void log_err(const char* fmt, ...) { +void log_dbg(const char* fmt, ...) { va_list ap; - fprintf(stderr, "[\e[31m==\e[0m] ERROR "); + fprintf(stdout, "\e[3m\e[37m[==] "); va_start(ap, fmt); - vfprintf(stderr, fmt, ap); + vfprintf(stdout, fmt, ap); va_end(ap); - fprintf(stderr, "\n"); + fprintf(stdout, "\e[0m\n"); } void log_inf(const char* fmt, ...) { @@ -60,6 +47,31 @@ void log_war(const char* fmt, ...) { fprintf(stderr, "\n"); } +void log_err(const char* fmt, ...) { + va_list ap; + + fprintf(stderr, "[\e[31m==\e[0m] ERROR "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); +} + +void die(const char* fmt, ...) { + va_list ap; + + fprintf(stderr, "[\e[31;1m==\e[0m] FATAL ERROR "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + + exit(1); +} + void* ecalloc(size_t nmemb, size_t size) { void* p; -- cgit v1.2.3