1.0.0HEAD master

author: Jon Santmyer <jon@jonsantmyer.com> 2025-07-28 19:40:02 -0400
committer: Jon Santmyer <jon@jonsantmyer.com> 2025-07-28 19:40:02 -0400
commit: 86ec41090a49f9576f55eef987689f4767e79c24 (patch)
tree: b3fadffdb7ea7600324646cabb9b5cccafa035b3 /main.c
download: jdgen-86ec41090a49f9576f55eef987689f4767e79c24.tar.gz
jdgen-86ec41090a49f9576f55eef987689f4767e79c24.tar.bz2
jdgen-86ec41090a49f9576f55eef987689f4767e79c24.zip
1 files changed, 549 insertions, 0 deletions
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..4e5bb2d
--- /dev/null
+++ b/main.c
@@ -0,0 +1,549 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include <string.h>
+#include <unistd.h>
+
+/**@TEXT # jodgen 1.0.0
+ *
+ * A documentation generator based on my own commenting specification.
+ *
+ * Usage:
+ * ```
+ * jdgen [-o OUTFILE] SRCFILE [SRCFILES...]
+ * ```
+ *
+ * If no output file is specified, text is written to stdout,
+ * meaning another valid way to write output would be:
+ * ```
+ * jdgen SRCFILE [SRCFILES...] > OUTFILE
+ * ```
+ *
+ * The following tags are used to emit special text:  
+ * `@TEXT` : Output the following block into the outfile.  
+ * `@FUNC` : Output a description and function prototype.  
+ * `@STRUCT` : Output a description after the struct definition.  
+ * `@ENUM` : Output a description after the enum definition.  
+ * `@PARAM` : Describes a function parameter.  
+ * `@VPARAM` : Describes a variable function parameter.  
+ * `@RETURN` : Describes how a function returns a value.  
+ * */
+
+/**@ENUM Token types
+ * identifiers for comment format specifiers, starting with \@
+ * */
+enum
+{
+    TK_TEXT = 0, /* Text specifier. Comment block only is recorded.*/
+    TK_FUNC, /* Function specifier. Comment block and function prototype is recorded.*/
+    TK_PARAM, /* Description for parameter in function prototype. */
+    TK_VPARAM, /* Description for variadic parameter in function prototype.*/
+    TK_RETURN, /* Return description for function prototype.*/
+    TK_ENUM, /* Enum specifier. First line is name, following is description, includes enum values.*/
+    TK_STRUCT, /* Struct sprcifier. First line is formal name, followed by description. includes full struct definition.*/
+};
+typedef unsigned char token_type_t;
+const char *token_typeNames[] = {
+    [TK_TEXT] = "TEXT",
+    [TK_FUNC] = "FUNC",
+    [TK_PARAM] = "PARAM",
+    [TK_VPARAM] = "VPARAM",
+    [TK_RETURN] = "RETURN",
+    [TK_ENUM] = "ENUM",
+    [TK_STRUCT] = "STRUCT"
+};
+const size_t token_typeCount = sizeof(token_typeNames) / sizeof(char*);
+
+/**@STRUCT Comment block
+ * stores data for each block (or block of) comments in a given source file.
+ * */
+typedef struct commentBlock
+{
+    FILE *file; /* file that this block comment is home to. */
+    size_t start; /* read offset in the file for the start of the block. */
+    size_t end; /* read offset in the file for the end of the block. */
+} comment_block_t;
+
+/**@STRUCT Parsed token
+ * stores data for each comment token.
+ * */
+typedef struct token
+{
+    struct token *next; /*Next token in the token stream. */
+    FILE *file; /*File this token is located in.*/
+    token_type_t type; /*Token ID for this token.*/
+    char *proto; /*Function prototype or structure definition or enum definition.*/
+    char *name; /* Function name.*/
+    char text[]; /*Characters associated with this token.*/
+} token_t;
+
+static FILE *docfile;
+
+/**@FUNC Prematurely terminates the program with a specified error message.
+ * @PARAM fmt string to format and print.
+ * @VPARAM arguments to format text with.
+ * */
+__attribute__((noreturn)) void
+fail(const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    abort();
+}
+
+/**@FUNC Extracts the next block of comments from the current position in the 
+ * file stream.
+ * Supports double slash comments and block comments.
+ * Moves the file stream to the end of the comment block it finds.
+ * @PARAM srcfile file to read a comment block from.
+ * @RETURN found comment block. NULL upon failure.
+ * */
+static comment_block_t*
+next_comment_block(FILE *srcfile)
+{
+    char c;
+    comment_block_t *comment = calloc(1, sizeof(comment_block_t));
+    comment->file = srcfile;
+
+    bool is_block = false;
+
+    while((c = fgetc(srcfile)) != EOF) {
+        if(c != '/') continue;
+
+        c = fgetc(srcfile);
+        if(c == '*') is_block = true;
+        else if(c == '/') is_block = false;
+        else continue;
+
+        comment->start = ftell(srcfile);
+        while((c = fgetc(srcfile)) != EOF) {
+            comment->end = ftell(srcfile);
+            if(is_block) {
+                if(c != '*') continue;
+                c = fgetc(srcfile);
+                if(c != '/') continue;
+                comment->end += 2;
+                return comment;
+            }else{
+                if(c != '\n') continue;
+                while(isspace(c = fgetc(srcfile))) continue;
+                if(c == '/' && fgetc(srcfile) == '/') continue;
+                comment->end--;
+                return comment;
+            }
+        }
+    }
+    free(comment);
+    return NULL;
+}
+
+static token_t*
+s_parse_token_text(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+    size_t textw = strlen(&buffer[*bufferi]);
+    token_t *token = calloc(1, sizeof(token_t) + textw);
+    token->file = comment->file;
+    token->proto = NULL;
+    token->type = TK_TEXT;
+    memcpy(token->text, &buffer[*bufferi], textw);
+
+    while(buffer[*bufferi]) (*bufferi)++;
+    return token;
+}
+
+static size_t
+s_get_token_desc(char *dest, const char *src)
+{
+    size_t srcw = strlen(src);
+    size_t writew = 0;
+    size_t extra = 0;
+    for(size_t i = 0; i < srcw; i++) {
+        if(src[i] == '\\') {
+            dest[writew++] = src[++i];
+            extra++;
+            continue;
+        }
+        if(src[i] == '@') break;
+        dest[writew++] = src[i];
+    }
+    if(dest[writew - 1] == '\n') dest[--writew] = 0;
+    return writew + extra;
+}
+
+static token_t*
+s_parse_token_func(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+    size_t bufferw = strlen(buffer);
+    size_t textw = bufferw - *bufferi;
+    token_t *token = calloc(1, sizeof(token_t) + textw);
+    (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+    token->file = comment->file;
+    token->type = TK_FUNC;
+    //Locate function prototype.
+    size_t proto_len = 0;
+    fseek(comment->file, comment->end, SEEK_SET);
+    for(char c; (c = fgetc(comment->file)) != EOF && c != ';' && c != '{'; proto_len++);
+    fseek(comment->file, comment->end, SEEK_SET);
+    token->proto = calloc(1, proto_len + 1);
+    proto_len = fread(token->proto, proto_len, 1, comment->file);
+
+    //Locate function name
+    size_t name_len = 0;
+    fseek(comment->file, comment->end, SEEK_SET);
+    for(char c, lastc = 0; (c = fgetc(comment->file)) != EOF; lastc = c) {
+        if(c == '(' && lastc != '_' && lastc != '(') break;
+    }
+    do { fseek(comment->file, -2, SEEK_CUR); name_len++; } while(!isspace(fgetc(comment->file)));
+    token->name = calloc(1, name_len);
+    name_len = fread(token->name, name_len - 1, 1, comment->file);
+
+    for(char *s = token->proto; *s; s++) if(*s == '\n') *s = ' ';
+    return token;
+}
+
+static token_t*
+s_parse_token_structlike(comment_block_t *comment, char *buffer, size_t *bufferi, token_type_t type)
+{
+    size_t bufferw = strlen(buffer);
+    size_t textw = bufferw - *bufferi;
+    token_t *token = calloc(1, sizeof(token_t) + textw);
+    (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+    token->file = comment->file;
+    token->type = type;
+    //Locate function prototype.
+    size_t proto_len = 0;
+    fseek(comment->file, comment->end, SEEK_SET);
+
+    bool is_typedef = false;
+    for(char c; (c = fgetc(comment->file)) != EOF; proto_len++) {
+        if(c == '}') {
+            c = fgetc(comment->file);
+            proto_len++;
+            if(c == ';') break;
+            is_typedef = true;
+        }
+        if(is_typedef && c == ';') break;
+    }
+    proto_len++;
+
+    fseek(comment->file, comment->end, SEEK_SET);
+    token->proto = calloc(1, proto_len + 1);
+    proto_len = fread(token->proto, proto_len, 1, comment->file);
+
+    return token;
+}
+
+static token_t*
+s_parse_token_struct(comment_block_t *comment, char *buffer, size_t *bufferi)
+{ return s_parse_token_structlike(comment, buffer, bufferi, TK_STRUCT); }
+
+static token_t*
+s_parse_token_enum(comment_block_t *comment, char *buffer, size_t *bufferi)
+{ return s_parse_token_structlike(comment, buffer, bufferi, TK_ENUM); }
+
+static token_t*
+s_parse_token_param(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+    size_t bufferw = strlen(buffer);
+    size_t textw = bufferw - *bufferi;
+    token_t *token = calloc(1, sizeof(token_t) + textw);
+    
+    //Locate variable name.
+    size_t protow = 0;
+    for(; !isspace(buffer[*bufferi + protow]); protow++);
+    token->proto = calloc(protow + 1, 1);
+    memcpy(token->proto, &buffer[*bufferi], protow);
+    *bufferi += protow + 1;
+
+    //Copy param description.
+    (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+    token->file = comment->file;
+    token->type = TK_PARAM;
+
+    return token;
+}
+
+static token_t*
+s_parse_token_vparam(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+    size_t bufferw = strlen(buffer);
+    size_t textw = bufferw - *bufferi;
+    token_t *token = calloc(1, sizeof(token_t) + textw);
+
+    //Copy param description.
+    (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+    token->file = comment->file;
+    token->type = TK_VPARAM;
+
+    return token;
+}
+
+static token_t*
+s_parse_token_return(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+    size_t bufferw = strlen(buffer);
+    size_t textw = bufferw - *bufferi;
+    token_t *token = calloc(1, sizeof(token_t) + textw);
+
+    //Copy param description.
+    (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+    token->file = comment->file;
+    token->type = TK_RETURN;
+
+    return token;
+}
+
+static token_t*
+s_eval_token_text(comment_block_t *comment, token_t *token)
+{
+    (void)comment;
+    token_t *next = token->next;
+ 
+    fprintf(docfile, "%s\n\n***\n", token->text);
+
+    free(token);
+    return next;
+}
+
+static void
+s_md_codeblock(const char *text)
+{
+    fprintf(docfile, "```\n%s\n```\n", text);
+
+}
+
+static token_t*
+s_eval_token_func(comment_block_t *comment, token_t *token)
+{
+    (void)comment;
+    token_t *next = token->next;
+    
+    fprintf(docfile, "#### %s\n\n", token->name);
+    fprintf(docfile, "%s\n\n", token->text);
+    s_md_codeblock(token->proto);
+
+    free(token->proto);
+    free(token->name);
+    free(token);
+    return next;
+}
+
+static token_t*
+s_eval_token_enum(comment_block_t *comment, token_t *token)
+{
+    (void)comment;
+    token_t *next = token->next;
+    
+    s_md_codeblock(token->proto);
+    fprintf(docfile, "%s\n\n***\n", token->text);
+
+    free(token->proto);
+    free(token);
+    return next;
+}
+
+static token_t*
+s_eval_token_param(comment_block_t *comment, token_t *token)
+{
+    (void)comment;
+    token_t *next = token->next;
+    
+    fprintf(docfile, "`%s` : ", token->proto);
+    fprintf(docfile, "%s\n\n", token->text);
+
+    free(token);
+    return next;
+}
+
+static token_t*
+s_eval_token_vparam(comment_block_t *comment, token_t *token)
+{
+    (void)comment;
+    token_t *next = token->next;
+    
+    fprintf(docfile, "`...` : %s\n\n", token->text);
+
+    free(token);
+    return next;
+}
+
+static token_t*
+s_eval_token_return(comment_block_t *comment, token_t *token)
+{
+    (void)comment;
+    token_t *next = token->next;
+    
+    fprintf(docfile, "`return` : %s\n\n", token->text);
+
+    free(token);
+    return next;
+}
+
+static token_t *(*tokenParsers[])(comment_block_t*, char*, size_t*) = {
+    [TK_TEXT] = s_parse_token_text,
+    [TK_FUNC] = s_parse_token_func,
+    [TK_PARAM] = s_parse_token_param,
+    [TK_VPARAM] = s_parse_token_vparam,
+    [TK_RETURN] = s_parse_token_return,
+    [TK_ENUM] = s_parse_token_enum,
+    [TK_STRUCT] = s_parse_token_struct
+};
+
+typedef token_t *(*token_evaluator_t)(comment_block_t*, token_t*);
+static token_evaluator_t tokenEvaluators[] = {
+    [TK_TEXT] = s_eval_token_text,
+    [TK_FUNC] = s_eval_token_func,
+    [TK_PARAM] = s_eval_token_param,
+    [TK_VPARAM] = s_eval_token_vparam,
+    [TK_RETURN] = s_eval_token_return,
+    [TK_ENUM] = s_eval_token_enum,
+    [TK_STRUCT] = s_eval_token_enum
+};
+
+token_t*
+tokenize_comment_stripped(comment_block_t *comment, char *buffer)
+{
+    token_t *tokenTail = NULL, *tokenHead = NULL;
+    for(size_t i = 0; buffer[i]; i++) {
+        if(buffer[i] != '@') continue;
+
+        size_t bufferWidth = strlen(&buffer[++i]);
+        /*Look for token keywords.*/
+
+        token_t *token = NULL;
+        for(size_t tki = 0; tki < token_typeCount; tki++) {
+            const char *tokenName = token_typeNames[tki];
+            size_t tokenNameW = strlen(tokenName);
+
+            if(bufferWidth < tokenNameW 
+                    || strncmp(&buffer[i], tokenName, tokenNameW) 
+                    || buffer[i + tokenNameW] != ' ') continue;
+            if(tokenParsers[tki] == NULL) continue;
+
+            i += tokenNameW + 1;
+            token = tokenParsers[tki](comment, buffer, &i);
+            break;
+        }
+        if(token == NULL) continue;
+        if(tokenTail == NULL) tokenTail = tokenHead = token;
+        else {
+            tokenHead->next = token;
+            tokenHead = token;
+        }
+    }
+    return tokenTail;
+}
+
+/**@FUNC Converts a comment block into a stream of tokens.
+ * @PARAM srcfile file to source comment data from.
+ * @PARAM comment data to use to read comment block.
+ * */
+token_t*
+tokenize_comment(FILE *srcfile, comment_block_t *comment)
+{
+    size_t commentSize = comment->end - comment->start;
+    char *commentBuffer = calloc(commentSize, 1);
+    size_t strippedSize = 0;
+
+    long opos = ftell(srcfile);
+    fseek(srcfile, comment->start, SEEK_SET);
+    size_t readsize = fread(commentBuffer, 1, commentSize, srcfile);
+    fseek(srcfile, opos, SEEK_SET);
+
+    if(commentSize > readsize) commentSize = readsize;
+
+    //First, strip the comment of comment characters.
+    for(size_t i = 0; i < commentSize; i++) {
+        char c = commentBuffer[i];
+        if(isspace(c) || c == '*' || c == '/') continue;
+        while(c != '\n' && i < commentSize) {
+            commentBuffer[strippedSize++] = c;
+            c = commentBuffer[++i];
+
+            if(c == '\n') 
+                commentBuffer[strippedSize++] = c;
+            if(i >= commentSize) break;
+        }
+    }
+
+    if(commentBuffer[strippedSize - 2] == '/' &&
+       commentBuffer[strippedSize - 3] == '*') strippedSize -= 3;
+
+    commentBuffer[strippedSize] = 0;
+    return tokenize_comment_stripped(comment, commentBuffer);
+}
+
+void
+process_comment(comment_block_t *comment)
+{
+    token_t *token = tokenize_comment(comment->file, comment);
+    while(token) {
+        token_evaluator_t eval = tokenEvaluators[token->type];
+        if(!eval) {
+            token_t *next = token->next;
+            free(token);
+            token = next;
+            continue;
+        }
+        token = eval(comment, token);
+    }
+}
+
+int
+main(int argc, char *argv[])
+{
+    /* TODO: Proper file IO.*/
+    char *ovalue = NULL;
+    int opt;
+    while((opt = getopt(argc, argv, "o:")) != -1) {
+        switch(opt) {
+            case 'o':
+                ovalue = optarg;
+                break;
+            case '?':
+                if(optopt == 'o')
+                    fail("Option %c requires an argument.\n", optopt);
+                else if(isprint(optopt))
+                    fail("Unknown option '%c'\n", optopt);
+                else 
+                    fail("Unknown option character '\\x%x'.\n", optopt);
+            default:
+                abort();
+        }
+    }
+
+    if(ovalue) {
+        docfile = fopen(ovalue, "w");
+        if(!docfile)
+            fail("Failed to open output file '%s' for writing\n", ovalue);
+    }else docfile = stdout;
+
+    int nsrc = 0;
+    for(int i = optind; i < argc; i++) {
+        char *srcstr = argv[i];
+        FILE *srcfile = fopen(srcstr, "r");
+        if(!srcfile) fail("Failed to open source file '%s' for reading\n", srcstr);
+
+        comment_block_t *block = NULL;
+        while((block = next_comment_block(srcfile))) {
+            process_comment(block);
+            free(block);
+        }
+
+        fclose(srcfile);
+        nsrc++;
+    }
+
+    if(nsrc == 0)
+        fail("Missing input files.\n");
+
+    return 0;
+}
author	Jon Santmyer <jon@jonsantmyer.com>	2025-07-28 19:40:02 -0400
committer	Jon Santmyer <jon@jonsantmyer.com>	2025-07-28 19:40:02 -0400
commit	86ec41090a49f9576f55eef987689f4767e79c24 (patch)
tree	b3fadffdb7ea7600324646cabb9b5cccafa035b3 /main.c
download	jdgen-86ec41090a49f9576f55eef987689f4767e79c24.tar.gz jdgen-86ec41090a49f9576f55eef987689f4767e79c24.tar.bz2 jdgen-86ec41090a49f9576f55eef987689f4767e79c24.zip