diff options
author | Jon Santmyer <jon@jonsantmyer.com> | 2025-07-28 19:40:02 -0400 |
---|---|---|
committer | Jon Santmyer <jon@jonsantmyer.com> | 2025-07-28 19:40:02 -0400 |
commit | 86ec41090a49f9576f55eef987689f4767e79c24 (patch) | |
tree | b3fadffdb7ea7600324646cabb9b5cccafa035b3 /main.c | |
download | jdgen-master.tar.gz jdgen-master.tar.bz2 jdgen-master.zip |
Diffstat (limited to 'main.c')
-rw-r--r-- | main.c | 549 |
1 files changed, 549 insertions, 0 deletions
@@ -0,0 +1,549 @@ +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdbool.h> +#include <ctype.h> +#include <string.h> +#include <unistd.h> + +/**@TEXT # jodgen 1.0.0 + * + * A documentation generator based on my own commenting specification. + * + * Usage: + * ``` + * jdgen [-o OUTFILE] SRCFILE [SRCFILES...] + * ``` + * + * If no output file is specified, text is written to stdout, + * meaning another valid way to write output would be: + * ``` + * jdgen SRCFILE [SRCFILES...] > OUTFILE + * ``` + * + * The following tags are used to emit special text: + * `@TEXT` : Output the following block into the outfile. + * `@FUNC` : Output a description and function prototype. + * `@STRUCT` : Output a description after the struct definition. + * `@ENUM` : Output a description after the enum definition. + * `@PARAM` : Describes a function parameter. + * `@VPARAM` : Describes a variable function parameter. + * `@RETURN` : Describes how a function returns a value. + * */ + +/**@ENUM Token types + * identifiers for comment format specifiers, starting with \@ + * */ +enum +{ + TK_TEXT = 0, /* Text specifier. Comment block only is recorded.*/ + TK_FUNC, /* Function specifier. Comment block and function prototype is recorded.*/ + TK_PARAM, /* Description for parameter in function prototype. */ + TK_VPARAM, /* Description for variadic parameter in function prototype.*/ + TK_RETURN, /* Return description for function prototype.*/ + TK_ENUM, /* Enum specifier. First line is name, following is description, includes enum values.*/ + TK_STRUCT, /* Struct sprcifier. First line is formal name, followed by description. includes full struct definition.*/ +}; +typedef unsigned char token_type_t; +const char *token_typeNames[] = { + [TK_TEXT] = "TEXT", + [TK_FUNC] = "FUNC", + [TK_PARAM] = "PARAM", + [TK_VPARAM] = "VPARAM", + [TK_RETURN] = "RETURN", + [TK_ENUM] = "ENUM", + [TK_STRUCT] = "STRUCT" +}; +const size_t token_typeCount = sizeof(token_typeNames) / sizeof(char*); + +/**@STRUCT Comment block + * stores data for each block (or block of) comments in a given source file. + * */ +typedef struct commentBlock +{ + FILE *file; /* file that this block comment is home to. */ + size_t start; /* read offset in the file for the start of the block. */ + size_t end; /* read offset in the file for the end of the block. */ +} comment_block_t; + +/**@STRUCT Parsed token + * stores data for each comment token. + * */ +typedef struct token +{ + struct token *next; /*Next token in the token stream. */ + FILE *file; /*File this token is located in.*/ + token_type_t type; /*Token ID for this token.*/ + char *proto; /*Function prototype or structure definition or enum definition.*/ + char *name; /* Function name.*/ + char text[]; /*Characters associated with this token.*/ +} token_t; + +static FILE *docfile; + +/**@FUNC Prematurely terminates the program with a specified error message. + * @PARAM fmt string to format and print. + * @VPARAM arguments to format text with. + * */ +__attribute__((noreturn)) void +fail(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + abort(); +} + +/**@FUNC Extracts the next block of comments from the current position in the + * file stream. + * Supports double slash comments and block comments. + * Moves the file stream to the end of the comment block it finds. + * @PARAM srcfile file to read a comment block from. + * @RETURN found comment block. NULL upon failure. + * */ +static comment_block_t* +next_comment_block(FILE *srcfile) +{ + char c; + comment_block_t *comment = calloc(1, sizeof(comment_block_t)); + comment->file = srcfile; + + bool is_block = false; + + while((c = fgetc(srcfile)) != EOF) { + if(c != '/') continue; + + c = fgetc(srcfile); + if(c == '*') is_block = true; + else if(c == '/') is_block = false; + else continue; + + comment->start = ftell(srcfile); + while((c = fgetc(srcfile)) != EOF) { + comment->end = ftell(srcfile); + if(is_block) { + if(c != '*') continue; + c = fgetc(srcfile); + if(c != '/') continue; + comment->end += 2; + return comment; + }else{ + if(c != '\n') continue; + while(isspace(c = fgetc(srcfile))) continue; + if(c == '/' && fgetc(srcfile) == '/') continue; + comment->end--; + return comment; + } + } + } + free(comment); + return NULL; +} + +static token_t* +s_parse_token_text(comment_block_t *comment, char *buffer, size_t *bufferi) +{ + size_t textw = strlen(&buffer[*bufferi]); + token_t *token = calloc(1, sizeof(token_t) + textw); + token->file = comment->file; + token->proto = NULL; + token->type = TK_TEXT; + memcpy(token->text, &buffer[*bufferi], textw); + + while(buffer[*bufferi]) (*bufferi)++; + return token; +} + +static size_t +s_get_token_desc(char *dest, const char *src) +{ + size_t srcw = strlen(src); + size_t writew = 0; + size_t extra = 0; + for(size_t i = 0; i < srcw; i++) { + if(src[i] == '\\') { + dest[writew++] = src[++i]; + extra++; + continue; + } + if(src[i] == '@') break; + dest[writew++] = src[i]; + } + if(dest[writew - 1] == '\n') dest[--writew] = 0; + return writew + extra; +} + +static token_t* +s_parse_token_func(comment_block_t *comment, char *buffer, size_t *bufferi) +{ + size_t bufferw = strlen(buffer); + size_t textw = bufferw - *bufferi; + token_t *token = calloc(1, sizeof(token_t) + textw); + (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); + + token->file = comment->file; + token->type = TK_FUNC; + //Locate function prototype. + size_t proto_len = 0; + fseek(comment->file, comment->end, SEEK_SET); + for(char c; (c = fgetc(comment->file)) != EOF && c != ';' && c != '{'; proto_len++); + fseek(comment->file, comment->end, SEEK_SET); + token->proto = calloc(1, proto_len + 1); + proto_len = fread(token->proto, proto_len, 1, comment->file); + + //Locate function name + size_t name_len = 0; + fseek(comment->file, comment->end, SEEK_SET); + for(char c, lastc = 0; (c = fgetc(comment->file)) != EOF; lastc = c) { + if(c == '(' && lastc != '_' && lastc != '(') break; + } + do { fseek(comment->file, -2, SEEK_CUR); name_len++; } while(!isspace(fgetc(comment->file))); + token->name = calloc(1, name_len); + name_len = fread(token->name, name_len - 1, 1, comment->file); + + for(char *s = token->proto; *s; s++) if(*s == '\n') *s = ' '; + return token; +} + +static token_t* +s_parse_token_structlike(comment_block_t *comment, char *buffer, size_t *bufferi, token_type_t type) +{ + size_t bufferw = strlen(buffer); + size_t textw = bufferw - *bufferi; + token_t *token = calloc(1, sizeof(token_t) + textw); + (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); + + token->file = comment->file; + token->type = type; + //Locate function prototype. + size_t proto_len = 0; + fseek(comment->file, comment->end, SEEK_SET); + + bool is_typedef = false; + for(char c; (c = fgetc(comment->file)) != EOF; proto_len++) { + if(c == '}') { + c = fgetc(comment->file); + proto_len++; + if(c == ';') break; + is_typedef = true; + } + if(is_typedef && c == ';') break; + } + proto_len++; + + fseek(comment->file, comment->end, SEEK_SET); + token->proto = calloc(1, proto_len + 1); + proto_len = fread(token->proto, proto_len, 1, comment->file); + + return token; +} + +static token_t* +s_parse_token_struct(comment_block_t *comment, char *buffer, size_t *bufferi) +{ return s_parse_token_structlike(comment, buffer, bufferi, TK_STRUCT); } + +static token_t* +s_parse_token_enum(comment_block_t *comment, char *buffer, size_t *bufferi) +{ return s_parse_token_structlike(comment, buffer, bufferi, TK_ENUM); } + +static token_t* +s_parse_token_param(comment_block_t *comment, char *buffer, size_t *bufferi) +{ + size_t bufferw = strlen(buffer); + size_t textw = bufferw - *bufferi; + token_t *token = calloc(1, sizeof(token_t) + textw); + + //Locate variable name. + size_t protow = 0; + for(; !isspace(buffer[*bufferi + protow]); protow++); + token->proto = calloc(protow + 1, 1); + memcpy(token->proto, &buffer[*bufferi], protow); + *bufferi += protow + 1; + + //Copy param description. + (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); + + token->file = comment->file; + token->type = TK_PARAM; + + return token; +} + +static token_t* +s_parse_token_vparam(comment_block_t *comment, char *buffer, size_t *bufferi) +{ + size_t bufferw = strlen(buffer); + size_t textw = bufferw - *bufferi; + token_t *token = calloc(1, sizeof(token_t) + textw); + + //Copy param description. + (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); + + token->file = comment->file; + token->type = TK_VPARAM; + + return token; +} + +static token_t* +s_parse_token_return(comment_block_t *comment, char *buffer, size_t *bufferi) +{ + size_t bufferw = strlen(buffer); + size_t textw = bufferw - *bufferi; + token_t *token = calloc(1, sizeof(token_t) + textw); + + //Copy param description. + (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); + + token->file = comment->file; + token->type = TK_RETURN; + + return token; +} + +static token_t* +s_eval_token_text(comment_block_t *comment, token_t *token) +{ + (void)comment; + token_t *next = token->next; + + fprintf(docfile, "%s\n\n***\n", token->text); + + free(token); + return next; +} + +static void +s_md_codeblock(const char *text) +{ + fprintf(docfile, "```\n%s\n```\n", text); + +} + +static token_t* +s_eval_token_func(comment_block_t *comment, token_t *token) +{ + (void)comment; + token_t *next = token->next; + + fprintf(docfile, "#### %s\n\n", token->name); + fprintf(docfile, "%s\n\n", token->text); + s_md_codeblock(token->proto); + + free(token->proto); + free(token->name); + free(token); + return next; +} + +static token_t* +s_eval_token_enum(comment_block_t *comment, token_t *token) +{ + (void)comment; + token_t *next = token->next; + + s_md_codeblock(token->proto); + fprintf(docfile, "%s\n\n***\n", token->text); + + free(token->proto); + free(token); + return next; +} + +static token_t* +s_eval_token_param(comment_block_t *comment, token_t *token) +{ + (void)comment; + token_t *next = token->next; + + fprintf(docfile, "`%s` : ", token->proto); + fprintf(docfile, "%s\n\n", token->text); + + free(token); + return next; +} + +static token_t* +s_eval_token_vparam(comment_block_t *comment, token_t *token) +{ + (void)comment; + token_t *next = token->next; + + fprintf(docfile, "`...` : %s\n\n", token->text); + + free(token); + return next; +} + +static token_t* +s_eval_token_return(comment_block_t *comment, token_t *token) +{ + (void)comment; + token_t *next = token->next; + + fprintf(docfile, "`return` : %s\n\n", token->text); + + free(token); + return next; +} + +static token_t *(*tokenParsers[])(comment_block_t*, char*, size_t*) = { + [TK_TEXT] = s_parse_token_text, + [TK_FUNC] = s_parse_token_func, + [TK_PARAM] = s_parse_token_param, + [TK_VPARAM] = s_parse_token_vparam, + [TK_RETURN] = s_parse_token_return, + [TK_ENUM] = s_parse_token_enum, + [TK_STRUCT] = s_parse_token_struct +}; + +typedef token_t *(*token_evaluator_t)(comment_block_t*, token_t*); +static token_evaluator_t tokenEvaluators[] = { + [TK_TEXT] = s_eval_token_text, + [TK_FUNC] = s_eval_token_func, + [TK_PARAM] = s_eval_token_param, + [TK_VPARAM] = s_eval_token_vparam, + [TK_RETURN] = s_eval_token_return, + [TK_ENUM] = s_eval_token_enum, + [TK_STRUCT] = s_eval_token_enum +}; + +token_t* +tokenize_comment_stripped(comment_block_t *comment, char *buffer) +{ + token_t *tokenTail = NULL, *tokenHead = NULL; + for(size_t i = 0; buffer[i]; i++) { + if(buffer[i] != '@') continue; + + size_t bufferWidth = strlen(&buffer[++i]); + /*Look for token keywords.*/ + + token_t *token = NULL; + for(size_t tki = 0; tki < token_typeCount; tki++) { + const char *tokenName = token_typeNames[tki]; + size_t tokenNameW = strlen(tokenName); + + if(bufferWidth < tokenNameW + || strncmp(&buffer[i], tokenName, tokenNameW) + || buffer[i + tokenNameW] != ' ') continue; + if(tokenParsers[tki] == NULL) continue; + + i += tokenNameW + 1; + token = tokenParsers[tki](comment, buffer, &i); + break; + } + if(token == NULL) continue; + if(tokenTail == NULL) tokenTail = tokenHead = token; + else { + tokenHead->next = token; + tokenHead = token; + } + } + return tokenTail; +} + +/**@FUNC Converts a comment block into a stream of tokens. + * @PARAM srcfile file to source comment data from. + * @PARAM comment data to use to read comment block. + * */ +token_t* +tokenize_comment(FILE *srcfile, comment_block_t *comment) +{ + size_t commentSize = comment->end - comment->start; + char *commentBuffer = calloc(commentSize, 1); + size_t strippedSize = 0; + + long opos = ftell(srcfile); + fseek(srcfile, comment->start, SEEK_SET); + size_t readsize = fread(commentBuffer, 1, commentSize, srcfile); + fseek(srcfile, opos, SEEK_SET); + + if(commentSize > readsize) commentSize = readsize; + + //First, strip the comment of comment characters. + for(size_t i = 0; i < commentSize; i++) { + char c = commentBuffer[i]; + if(isspace(c) || c == '*' || c == '/') continue; + while(c != '\n' && i < commentSize) { + commentBuffer[strippedSize++] = c; + c = commentBuffer[++i]; + + if(c == '\n') + commentBuffer[strippedSize++] = c; + if(i >= commentSize) break; + } + } + + if(commentBuffer[strippedSize - 2] == '/' && + commentBuffer[strippedSize - 3] == '*') strippedSize -= 3; + + commentBuffer[strippedSize] = 0; + return tokenize_comment_stripped(comment, commentBuffer); +} + +void +process_comment(comment_block_t *comment) +{ + token_t *token = tokenize_comment(comment->file, comment); + while(token) { + token_evaluator_t eval = tokenEvaluators[token->type]; + if(!eval) { + token_t *next = token->next; + free(token); + token = next; + continue; + } + token = eval(comment, token); + } +} + +int +main(int argc, char *argv[]) +{ + /* TODO: Proper file IO.*/ + char *ovalue = NULL; + int opt; + while((opt = getopt(argc, argv, "o:")) != -1) { + switch(opt) { + case 'o': + ovalue = optarg; + break; + case '?': + if(optopt == 'o') + fail("Option %c requires an argument.\n", optopt); + else if(isprint(optopt)) + fail("Unknown option '%c'\n", optopt); + else + fail("Unknown option character '\\x%x'.\n", optopt); + default: + abort(); + } + } + + if(ovalue) { + docfile = fopen(ovalue, "w"); + if(!docfile) + fail("Failed to open output file '%s' for writing\n", ovalue); + }else docfile = stdout; + + int nsrc = 0; + for(int i = optind; i < argc; i++) { + char *srcstr = argv[i]; + FILE *srcfile = fopen(srcstr, "r"); + if(!srcfile) fail("Failed to open source file '%s' for reading\n", srcstr); + + comment_block_t *block = NULL; + while((block = next_comment_block(srcfile))) { + process_comment(block); + free(block); + } + + fclose(srcfile); + nsrc++; + } + + if(nsrc == 0) + fail("Missing input files.\n"); + + return 0; +} |