From 58c7a71a50318940e747c365cc3f207dba432977 Mon Sep 17 00:00:00 2001
From: c+1
Date: Tue, 10 Oct 2023 11:26:44 -0400
Subject: fixed source.c, fixed preprocessor mem leaks, implemented new lexer

---
 src/include/hlkt.h  | 10 ++++---
 src/include/lexer.h |  5 +++-
 src/include/pp.h    | 55 +++++++++++++++++++++++++++++++++++
 src/include/token.h |  3 +-
 src/include/util.h  | 10 ++++---
 src/lexer.c         | 11 +++++--
 src/main.c          | 24 ++++++++-------
 src/pp.c            | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/source.c        | 32 +++++---------------
 src/token.c         |  6 ++++
 src/util.c          | 46 ++++++++++++++++++-----------
 11 files changed, 221 insertions(+), 65 deletions(-)
 create mode 100644 src/include/pp.h
 create mode 100644 src/pp.c

(limited to 'src')

diff --git a/src/include/hlkt.h b/src/include/hlkt.h
index 4496ce1..ebcb7f6 100644
--- a/src/include/hlkt.h
+++ b/src/include/hlkt.h
@@ -10,17 +10,19 @@ static int hlkt_failed  = 0;     /* number of tests that have failed */
 
 #define HLKT_LOG() HLKT_HIDE(                                                                                                 \
    if ((hlkt_run > 0) && (hlkt_failed > 0)) {                                                                                 \
-      log_war("HLKT: %d/%d tests failed", hlkt_failed, hlkt_run);                                                             \
+      log_err("HLKT: %d/%d tests failed", hlkt_failed, hlkt_run);                                                             \
    } else {                                                                                                                   \
-      log_inf("HLKT: all %d tests passed", hlkt_run);                                                                         \
+      log_dbg("HLKT: all %d tests passed", hlkt_run);                                                                         \
    }                                                                                                                          \
 )
 
 #define HLKT_ASS(pred) HLKT_HIDE(                                                                                             \
    hlkt_run ++;                                                                                                               \
-   if (! pred) {                                                                                                              \
+   if (! (pred)) {                                                                                                            \
       hlkt_failed ++;                                                                                                         \
-      log_err("HLKT: test [%s] failed: %s:%s:%d", #pred, __FILE__, __func__, __LINE__);                                       \
+      log_war("HLKT: test failed: %s/%s/%d", __FILE__, __func__, __LINE__);                                                   \
+   } else {                                                                                                                   \
+      log_dbg("HLKT: test passed: %s/%s/%d", __FILE__, __func__, __LINE__);                                                   \
    }                                                                                                                          \
 )
 
diff --git a/src/include/lexer.h b/src/include/lexer.h
index 173c57d..b2bf9eb 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -28,6 +28,9 @@ typedef struct LEXER_STRUC {
 
    /* the linked list of tokens generated */
    token_t* tokenl;
+   /* pointer to the last token in tokenl */
+   token_t* tokenl_last;
+   /* number of tokens in tokenl */
    int tokenc;
 } lexer_t;
 
@@ -37,7 +40,7 @@ lexer_t* lexer_init (char* src);
 /* destroy lexer **but not src or tokenl** */
 void lexer_destroy (lexer_t* lexer);
 
-/* add token to tokenv */
+/* add token to tokenl */
 void lexer_add_token(lexer_t* lexer, token_t* token);
 /* add the current character as a token to tokenl -- utility function for
    lexer_do_reg() */
diff --git a/src/include/pp.h b/src/include/pp.h
new file mode 100644
index 0000000..d82907c
--- /dev/null
+++ b/src/include/pp.h
@@ -0,0 +1,55 @@
+#ifndef PP_H
+#define PP_H
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+#include "syntax.h"
+
+/* TODO */
+typedef struct MACRO_STRUC {
+   char* id;
+   char* val;
+} macro_t;
+
+/* 
+   preprocessor struct
+
+   TODO: keep track of macros
+*/
+typedef struct PP_STRUC {
+   /* original source */
+   char* src;
+
+   /* pre-processed source */
+   char* psrc;
+
+   /* what the preprocessor is looking at right now */
+   enum PP_STATE {
+      PP_STATE_REG,  /* regular */
+      PP_STATE_STR,  /* string */
+      PP_STATE_COM,  /* comment */
+      PP_STATE_ESC,  /* escaped character in string */
+      /* PP_STATE_MCO, */  /* macro */
+   } state;
+} pp_t;
+
+/* creates a new preprocessor from some source code */
+pp_t* pp_init(char*);
+
+/* destroys the preprocessor **but not the pre-processed source** */
+void pp_destroy(pp_t*);
+
+/* copy over the current character from src to psrc */
+void pp_cpy_char(pp_t*);
+
+void pp_do_reg(pp_t*);
+void pp_do_str(pp_t*);
+void pp_do_com(pp_t*);
+
+/* run the preprocessor */
+void pp_run(pp_t*);
+
+#endif
+
diff --git a/src/include/token.h b/src/include/token.h
index 802f13d..6779755 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -2,6 +2,7 @@
 #define TOKEN_H
 
 #include "util.h"
+#include "hlkt.h"
 
 /* token struct */
 typedef struct TOKEN_STRUC {
@@ -35,7 +36,7 @@ typedef struct TOKEN_STRUC {
 
 /* creates a token */
 token_t* token_init(int type, char* val);
-/* destroys a token **and all tokens contained in nxt** */
+/* destroys a token **and all tokens contained in nxt** **Make sure to set the nxt of any parent tokens to NULL** */
 void token_destroy(token_t* token);
 
 /* return pointer to the last token */
diff --git a/src/include/util.h b/src/include/util.h
index 712af43..cfc85c2 100644
--- a/src/include/util.h
+++ b/src/include/util.h
@@ -7,16 +7,18 @@
 #include <stdio.h>
 
 
-/* die and leave message */
-void die(const char*, ...);
-/* log an error */
-void log_err(const char*, ...);
+/* log some debug information */
+void log_dbg(const char*, ...);
 /* log some information */
 void log_inf(const char*, ...);
 /* log something with no formatting */
 void log_raw(const char*, ...);
 /* log a warning */
 void log_war(const char*, ...);
+/* log an error */
+void log_err(const char*, ...);
+/* die and leave message */
+void die(const char*, ...);
 
 /* if calloc() returns null, die */
 void* ecalloc(size_t, size_t);
diff --git a/src/lexer.c b/src/lexer.c
index e9475b6..efdc718 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -13,6 +13,7 @@ lexer_t* lexer_init(char* src) {
    lexer->src = src;
    lexer->state = LEXER_STATE_REG;
    lexer->tokenl = NULL;
+   lexer->tokenl_last = NULL;
    lexer->tokenc = 0;
 
    return lexer;
@@ -25,8 +26,13 @@ void lexer_destroy(lexer_t* lexer) {
 void lexer_add_token(lexer_t* lexer, token_t* token) {
    token_t* t;
 
-   t = token_last(lexer->tokenl)->nxt;
-   t = token;
+   if (lexer->tokenl) {
+      lexer->tokenl_last->nxt = token;
+      lexer->tokenl_last = token;
+   } else {
+      lexer->tokenl = token;
+      lexer->tokenl_last = token;
+   }
 
    log_inf("token/v:%s\t/t:%d", token->val, token->type);
 
@@ -53,7 +59,6 @@ void lexer_do_reg(lexer_t* lexer) {
          break;
       default:
          lexer_add_current_char(lexer, TOKEN_UNKNOWN);
-         
    }
 }
 
diff --git a/src/main.c b/src/main.c
index c0a8d43..7146c14 100644
--- a/src/main.c
+++ b/src/main.c
@@ -2,11 +2,11 @@
 #include <stdlib.h>
 
 #include "include/util.h"
+#include "include/hlkt.h"
+#include "include/source.h"
 #include "include/token.h"
 #include "include/pp.h"
 #include "include/lexer.h"
-#include "include/source.h"
-#include "include/hlkt.h"
 
 int main(int argc, char* argv[]) {
    char*    src;     /* the source "code" */
@@ -16,33 +16,37 @@ int main(int argc, char* argv[]) {
    /* get source */
    src = source_get(argv[1]); 
    HLKT_ASS(src);
-   log_inf("source gotten");
+   log_dbg("source gotten");
+   log_inf("source: %s", src);
 
    /* create pre-processor */
    pp = pp_init(src);
    HLKT_ASS(pp);
-   log_inf("preprocessor created");
+   log_dbg("preprocessor created");
 
    /* pre-process source */
    pp_run(pp);
    free(src);
    src = pp->psrc;
+   log_dbg(pp->psrc);
+   /* destroy pre-processor */
+   pp_destroy(pp);
    HLKT_ASS(src);
-   log_inf("preprocessor ran");
+   log_dbg("preprocessor ran");
 
    /* create lexer */
    lexer = lexer_init(src);
    HLKT_ASS(lexer);
-   log_inf("lexer created");
+   HLKT_ASS(lexer->src == src);
+   log_dbg("lexer created");
 
    /* run lexer */
    lexer_run(lexer);
-   log_inf("lexer ran");
+   log_dbg("lexer ran");
 
-   /* clean up */
-   pp_destroy(pp);
+   /* clean up lexer stuff */
+   if (lexer->tokenl) { token_destroy(lexer->tokenl); } /* temp until parser eats tokens */
    lexer_destroy(lexer);
-   token_destroy(lexer->tokenl);
    free(src);
 
    HLKT_LOG();
diff --git a/src/pp.c b/src/pp.c
new file mode 100644
index 0000000..6664879
--- /dev/null
+++ b/src/pp.c
@@ -0,0 +1,84 @@
+#include "include/pp.h"
+
+pp_t* pp_init(char* src) {
+   pp_t* pp;
+
+   pp = ecalloc(1, sizeof(pp_t));
+
+   pp->src = src;
+   pp->psrc = (char*) emalloc(1);
+   pp->psrc[0] = '\0';
+
+   pp->state = PP_STATE_REG;
+
+   return pp;
+}
+
+void pp_destroy(pp_t* pp) {
+   free(pp);
+}
+
+void pp_cpy_char(pp_t* pp) {
+   int plen = strlen(pp->psrc);
+   pp->psrc = erealloc(pp->psrc, (plen + 2) * sizeof(char));
+
+   pp->psrc[plen] = *pp->src;
+   pp->psrc[plen + 1] = '\0';
+}
+
+void pp_do_reg(pp_t* pp) {
+   switch (*pp->src) {
+      case SYNTAX_COMMENT_DELIM:
+         pp->state = PP_STATE_COM; 
+         break;
+      case SYNTAX_STR_DELIM:
+         pp_cpy_char(pp);
+         pp->state = PP_STATE_STR;
+         break;
+      case ' ': 
+      case '\n':
+      case '\r':
+      case '\t':
+         /* skip whitespace */
+         break;
+
+      default:
+         pp_cpy_char(pp);
+   }
+}
+
+void pp_do_str(pp_t* pp) {
+   if (pp->state == PP_STATE_ESC) {
+      pp_cpy_char(pp);
+      pp->state = PP_STATE_STR;
+   } else {
+      if (*pp->src == SYNTAX_ESC) {
+         pp_cpy_char(pp);
+         pp->state = PP_STATE_ESC;
+      } else if (*pp->src == SYNTAX_STR_DELIM) {
+         pp_cpy_char(pp);
+         pp->state = PP_STATE_REG;
+      }
+      else {
+         pp_cpy_char(pp);
+      }
+   }
+}
+
+void pp_do_com(pp_t* pp) {
+   if (*pp->src == SYNTAX_COMMENT_DELIM) {
+      pp->state = PP_STATE_REG;
+   }
+
+   /* let pp_run skip chars in comments */
+}
+
+void pp_run(pp_t* pp) {
+   while (*pp->src) {
+      if (pp->state == PP_STATE_REG) { pp_do_reg(pp); }
+      else if (pp->state == PP_STATE_STR || pp->state == PP_STATE_ESC) { pp_do_str(pp); }
+      else if (pp->state == PP_STATE_COM) { pp_do_com(pp); }
+
+      pp->src ++;
+   }
+}
diff --git a/src/source.c b/src/source.c
index b098751..4874d5a 100644
--- a/src/source.c
+++ b/src/source.c
@@ -6,13 +6,12 @@ char* source_get(char* arg) {
    return arg? 
       source_get_from_fpath(arg):
       source_get_from_stdin();
-
 }
 
 char* source_get_from_fpath(char* path) {
-   FILE* f;       // the file to read from
-   long f_size;   // the size of the file
-   char* src;     // the source code to return
+   FILE* f;
+   long f_size;
+   char* src;
 
    f = fopen(path, "rb");
    if (!f) { die("source file not found: %s", path); }
@@ -21,7 +20,7 @@ char* source_get_from_fpath(char* path) {
    f_size = ftell(f);
    rewind(f);
 
-   src = calloc(1, f_size + 1);
+   src = ecalloc(1, f_size + 1);
 
    if ((fread(src, f_size, 1, f) != 1) || !src) {
       fclose(f);
@@ -33,28 +32,11 @@ char* source_get_from_fpath(char* path) {
 }
 
 char* source_get_from_stdin() {
-   size_t len; // the length of the given source
-   char* src;  // the source code to return
-
-   len = 0;
-   src = calloc(len, sizeof(char));
-
-   printf("> ");
-
-   while (src[len - 1] != EOF) {
-      char c;  // the character being read
-
-      if (src[len - 1] == '\n') { printf("> "); }
-
-      c = getchar();
-      src = realloc(src, (len + sizeof(char)));
-      memcpy(src + len, &c, sizeof(char));
-      len += sizeof(char);
-   }
+   char* src;
 
-   src[len - 1] = '\0'; // null terminate
+   src = ecalloc(256, sizeof(char));
 
-   putchar('\n');
+   src = fgets(src, 256, stdin);
 
    return src;
 }
diff --git a/src/token.c b/src/token.c
index 26af598..ece32f4 100644
--- a/src/token.c
+++ b/src/token.c
@@ -14,6 +14,12 @@ token_t* token_init(int type, char* val) {
 }
 
 void token_destroy(token_t* token) {
+   if (token->nxt) {
+      token_destroy(token->nxt);
+      token->nxt = NULL;
+   }
+
+   free(token->val);
    free(token);
 }
 
diff --git a/src/util.c b/src/util.c
index a175f6b..3a236cd 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1,29 +1,16 @@
 #include "include/util.h"
 
 
-void die(const char* fmt, ...) {
-   va_list ap;
-
-   fprintf(stderr, "[\e[31;1m==\e[0m] FATAL ERROR ");
-
-   va_start(ap, fmt);
-   vfprintf(stderr, fmt, ap);
-   va_end(ap);
-   fprintf(stderr, "\n");
-
-   exit(1);
-}
-
-void log_err(const char* fmt, ...) {
+void log_dbg(const char* fmt, ...) {
    va_list ap;
 
-   fprintf(stderr, "[\e[31m==\e[0m] ERROR ");
+   fprintf(stdout, "\e[3m\e[37m[==] ");
 
    va_start(ap, fmt);
-   vfprintf(stderr, fmt, ap);
+   vfprintf(stdout, fmt, ap);
    va_end(ap);
 
-   fprintf(stderr, "\n");
+   fprintf(stdout, "\e[0m\n");
 }
 
 void log_inf(const char* fmt, ...) {
@@ -60,6 +47,31 @@ void log_war(const char* fmt, ...) {
    fprintf(stderr, "\n");
 }
 
+void log_err(const char* fmt, ...) {
+   va_list ap;
+
+   fprintf(stderr, "[\e[31m==\e[0m] ERROR ");
+
+   va_start(ap, fmt);
+   vfprintf(stderr, fmt, ap);
+   va_end(ap);
+
+   fprintf(stderr, "\n");
+}
+
+void die(const char* fmt, ...) {
+   va_list ap;
+
+   fprintf(stderr, "[\e[31;1m==\e[0m] FATAL ERROR ");
+
+   va_start(ap, fmt);
+   vfprintf(stderr, fmt, ap);
+   va_end(ap);
+   fprintf(stderr, "\n");
+
+   exit(1);
+}
+
 void* ecalloc(size_t nmemb, size_t size) {
    void* p;
 
-- 
cgit v1.2.3