summaryrefslogtreecommitdiffstats
path: root/main.c
diff options
context:
space:
mode:
authorJon Santmyer <jon@jonsantmyer.com>2025-07-28 19:40:02 -0400
committerJon Santmyer <jon@jonsantmyer.com>2025-07-28 19:40:02 -0400
commit86ec41090a49f9576f55eef987689f4767e79c24 (patch)
treeb3fadffdb7ea7600324646cabb9b5cccafa035b3 /main.c
downloadjdgen-86ec41090a49f9576f55eef987689f4767e79c24.tar.gz
jdgen-86ec41090a49f9576f55eef987689f4767e79c24.tar.bz2
jdgen-86ec41090a49f9576f55eef987689f4767e79c24.zip
Diffstat (limited to 'main.c')
-rw-r--r--main.c549
1 files changed, 549 insertions, 0 deletions
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..4e5bb2d
--- /dev/null
+++ b/main.c
@@ -0,0 +1,549 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include <string.h>
+#include <unistd.h>
+
+/**@TEXT # jodgen 1.0.0
+ *
+ * A documentation generator based on my own commenting specification.
+ *
+ * Usage:
+ * ```
+ * jdgen [-o OUTFILE] SRCFILE [SRCFILES...]
+ * ```
+ *
+ * If no output file is specified, text is written to stdout,
+ * meaning another valid way to write output would be:
+ * ```
+ * jdgen SRCFILE [SRCFILES...] > OUTFILE
+ * ```
+ *
+ * The following tags are used to emit special text:
+ * `@TEXT` : Output the following block into the outfile.
+ * `@FUNC` : Output a description and function prototype.
+ * `@STRUCT` : Output a description after the struct definition.
+ * `@ENUM` : Output a description after the enum definition.
+ * `@PARAM` : Describes a function parameter.
+ * `@VPARAM` : Describes a variable function parameter.
+ * `@RETURN` : Describes how a function returns a value.
+ * */
+
+/**@ENUM Token types
+ * identifiers for comment format specifiers, starting with \@
+ * */
+enum
+{
+ TK_TEXT = 0, /* Text specifier. Comment block only is recorded.*/
+ TK_FUNC, /* Function specifier. Comment block and function prototype is recorded.*/
+ TK_PARAM, /* Description for parameter in function prototype. */
+ TK_VPARAM, /* Description for variadic parameter in function prototype.*/
+ TK_RETURN, /* Return description for function prototype.*/
+ TK_ENUM, /* Enum specifier. First line is name, following is description, includes enum values.*/
+ TK_STRUCT, /* Struct sprcifier. First line is formal name, followed by description. includes full struct definition.*/
+};
+typedef unsigned char token_type_t;
+const char *token_typeNames[] = {
+ [TK_TEXT] = "TEXT",
+ [TK_FUNC] = "FUNC",
+ [TK_PARAM] = "PARAM",
+ [TK_VPARAM] = "VPARAM",
+ [TK_RETURN] = "RETURN",
+ [TK_ENUM] = "ENUM",
+ [TK_STRUCT] = "STRUCT"
+};
+const size_t token_typeCount = sizeof(token_typeNames) / sizeof(char*);
+
+/**@STRUCT Comment block
+ * stores data for each block (or block of) comments in a given source file.
+ * */
+typedef struct commentBlock
+{
+ FILE *file; /* file that this block comment is home to. */
+ size_t start; /* read offset in the file for the start of the block. */
+ size_t end; /* read offset in the file for the end of the block. */
+} comment_block_t;
+
+/**@STRUCT Parsed token
+ * stores data for each comment token.
+ * */
+typedef struct token
+{
+ struct token *next; /*Next token in the token stream. */
+ FILE *file; /*File this token is located in.*/
+ token_type_t type; /*Token ID for this token.*/
+ char *proto; /*Function prototype or structure definition or enum definition.*/
+ char *name; /* Function name.*/
+ char text[]; /*Characters associated with this token.*/
+} token_t;
+
+static FILE *docfile;
+
+/**@FUNC Prematurely terminates the program with a specified error message.
+ * @PARAM fmt string to format and print.
+ * @VPARAM arguments to format text with.
+ * */
+__attribute__((noreturn)) void
+fail(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ abort();
+}
+
+/**@FUNC Extracts the next block of comments from the current position in the
+ * file stream.
+ * Supports double slash comments and block comments.
+ * Moves the file stream to the end of the comment block it finds.
+ * @PARAM srcfile file to read a comment block from.
+ * @RETURN found comment block. NULL upon failure.
+ * */
+static comment_block_t*
+next_comment_block(FILE *srcfile)
+{
+ char c;
+ comment_block_t *comment = calloc(1, sizeof(comment_block_t));
+ comment->file = srcfile;
+
+ bool is_block = false;
+
+ while((c = fgetc(srcfile)) != EOF) {
+ if(c != '/') continue;
+
+ c = fgetc(srcfile);
+ if(c == '*') is_block = true;
+ else if(c == '/') is_block = false;
+ else continue;
+
+ comment->start = ftell(srcfile);
+ while((c = fgetc(srcfile)) != EOF) {
+ comment->end = ftell(srcfile);
+ if(is_block) {
+ if(c != '*') continue;
+ c = fgetc(srcfile);
+ if(c != '/') continue;
+ comment->end += 2;
+ return comment;
+ }else{
+ if(c != '\n') continue;
+ while(isspace(c = fgetc(srcfile))) continue;
+ if(c == '/' && fgetc(srcfile) == '/') continue;
+ comment->end--;
+ return comment;
+ }
+ }
+ }
+ free(comment);
+ return NULL;
+}
+
+static token_t*
+s_parse_token_text(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+ size_t textw = strlen(&buffer[*bufferi]);
+ token_t *token = calloc(1, sizeof(token_t) + textw);
+ token->file = comment->file;
+ token->proto = NULL;
+ token->type = TK_TEXT;
+ memcpy(token->text, &buffer[*bufferi], textw);
+
+ while(buffer[*bufferi]) (*bufferi)++;
+ return token;
+}
+
+static size_t
+s_get_token_desc(char *dest, const char *src)
+{
+ size_t srcw = strlen(src);
+ size_t writew = 0;
+ size_t extra = 0;
+ for(size_t i = 0; i < srcw; i++) {
+ if(src[i] == '\\') {
+ dest[writew++] = src[++i];
+ extra++;
+ continue;
+ }
+ if(src[i] == '@') break;
+ dest[writew++] = src[i];
+ }
+ if(dest[writew - 1] == '\n') dest[--writew] = 0;
+ return writew + extra;
+}
+
+static token_t*
+s_parse_token_func(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+ size_t bufferw = strlen(buffer);
+ size_t textw = bufferw - *bufferi;
+ token_t *token = calloc(1, sizeof(token_t) + textw);
+ (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+ token->file = comment->file;
+ token->type = TK_FUNC;
+ //Locate function prototype.
+ size_t proto_len = 0;
+ fseek(comment->file, comment->end, SEEK_SET);
+ for(char c; (c = fgetc(comment->file)) != EOF && c != ';' && c != '{'; proto_len++);
+ fseek(comment->file, comment->end, SEEK_SET);
+ token->proto = calloc(1, proto_len + 1);
+ proto_len = fread(token->proto, proto_len, 1, comment->file);
+
+ //Locate function name
+ size_t name_len = 0;
+ fseek(comment->file, comment->end, SEEK_SET);
+ for(char c, lastc = 0; (c = fgetc(comment->file)) != EOF; lastc = c) {
+ if(c == '(' && lastc != '_' && lastc != '(') break;
+ }
+ do { fseek(comment->file, -2, SEEK_CUR); name_len++; } while(!isspace(fgetc(comment->file)));
+ token->name = calloc(1, name_len);
+ name_len = fread(token->name, name_len - 1, 1, comment->file);
+
+ for(char *s = token->proto; *s; s++) if(*s == '\n') *s = ' ';
+ return token;
+}
+
+static token_t*
+s_parse_token_structlike(comment_block_t *comment, char *buffer, size_t *bufferi, token_type_t type)
+{
+ size_t bufferw = strlen(buffer);
+ size_t textw = bufferw - *bufferi;
+ token_t *token = calloc(1, sizeof(token_t) + textw);
+ (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+ token->file = comment->file;
+ token->type = type;
+ //Locate function prototype.
+ size_t proto_len = 0;
+ fseek(comment->file, comment->end, SEEK_SET);
+
+ bool is_typedef = false;
+ for(char c; (c = fgetc(comment->file)) != EOF; proto_len++) {
+ if(c == '}') {
+ c = fgetc(comment->file);
+ proto_len++;
+ if(c == ';') break;
+ is_typedef = true;
+ }
+ if(is_typedef && c == ';') break;
+ }
+ proto_len++;
+
+ fseek(comment->file, comment->end, SEEK_SET);
+ token->proto = calloc(1, proto_len + 1);
+ proto_len = fread(token->proto, proto_len, 1, comment->file);
+
+ return token;
+}
+
+static token_t*
+s_parse_token_struct(comment_block_t *comment, char *buffer, size_t *bufferi)
+{ return s_parse_token_structlike(comment, buffer, bufferi, TK_STRUCT); }
+
+static token_t*
+s_parse_token_enum(comment_block_t *comment, char *buffer, size_t *bufferi)
+{ return s_parse_token_structlike(comment, buffer, bufferi, TK_ENUM); }
+
+static token_t*
+s_parse_token_param(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+ size_t bufferw = strlen(buffer);
+ size_t textw = bufferw - *bufferi;
+ token_t *token = calloc(1, sizeof(token_t) + textw);
+
+ //Locate variable name.
+ size_t protow = 0;
+ for(; !isspace(buffer[*bufferi + protow]); protow++);
+ token->proto = calloc(protow + 1, 1);
+ memcpy(token->proto, &buffer[*bufferi], protow);
+ *bufferi += protow + 1;
+
+ //Copy param description.
+ (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+ token->file = comment->file;
+ token->type = TK_PARAM;
+
+ return token;
+}
+
+static token_t*
+s_parse_token_vparam(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+ size_t bufferw = strlen(buffer);
+ size_t textw = bufferw - *bufferi;
+ token_t *token = calloc(1, sizeof(token_t) + textw);
+
+ //Copy param description.
+ (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+ token->file = comment->file;
+ token->type = TK_VPARAM;
+
+ return token;
+}
+
+static token_t*
+s_parse_token_return(comment_block_t *comment, char *buffer, size_t *bufferi)
+{
+ size_t bufferw = strlen(buffer);
+ size_t textw = bufferw - *bufferi;
+ token_t *token = calloc(1, sizeof(token_t) + textw);
+
+ //Copy param description.
+ (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]);
+
+ token->file = comment->file;
+ token->type = TK_RETURN;
+
+ return token;
+}
+
+static token_t*
+s_eval_token_text(comment_block_t *comment, token_t *token)
+{
+ (void)comment;
+ token_t *next = token->next;
+
+ fprintf(docfile, "%s\n\n***\n", token->text);
+
+ free(token);
+ return next;
+}
+
+static void
+s_md_codeblock(const char *text)
+{
+ fprintf(docfile, "```\n%s\n```\n", text);
+
+}
+
+static token_t*
+s_eval_token_func(comment_block_t *comment, token_t *token)
+{
+ (void)comment;
+ token_t *next = token->next;
+
+ fprintf(docfile, "#### %s\n\n", token->name);
+ fprintf(docfile, "%s\n\n", token->text);
+ s_md_codeblock(token->proto);
+
+ free(token->proto);
+ free(token->name);
+ free(token);
+ return next;
+}
+
+static token_t*
+s_eval_token_enum(comment_block_t *comment, token_t *token)
+{
+ (void)comment;
+ token_t *next = token->next;
+
+ s_md_codeblock(token->proto);
+ fprintf(docfile, "%s\n\n***\n", token->text);
+
+ free(token->proto);
+ free(token);
+ return next;
+}
+
+static token_t*
+s_eval_token_param(comment_block_t *comment, token_t *token)
+{
+ (void)comment;
+ token_t *next = token->next;
+
+ fprintf(docfile, "`%s` : ", token->proto);
+ fprintf(docfile, "%s\n\n", token->text);
+
+ free(token);
+ return next;
+}
+
+static token_t*
+s_eval_token_vparam(comment_block_t *comment, token_t *token)
+{
+ (void)comment;
+ token_t *next = token->next;
+
+ fprintf(docfile, "`...` : %s\n\n", token->text);
+
+ free(token);
+ return next;
+}
+
+static token_t*
+s_eval_token_return(comment_block_t *comment, token_t *token)
+{
+ (void)comment;
+ token_t *next = token->next;
+
+ fprintf(docfile, "`return` : %s\n\n", token->text);
+
+ free(token);
+ return next;
+}
+
+static token_t *(*tokenParsers[])(comment_block_t*, char*, size_t*) = {
+ [TK_TEXT] = s_parse_token_text,
+ [TK_FUNC] = s_parse_token_func,
+ [TK_PARAM] = s_parse_token_param,
+ [TK_VPARAM] = s_parse_token_vparam,
+ [TK_RETURN] = s_parse_token_return,
+ [TK_ENUM] = s_parse_token_enum,
+ [TK_STRUCT] = s_parse_token_struct
+};
+
+typedef token_t *(*token_evaluator_t)(comment_block_t*, token_t*);
+static token_evaluator_t tokenEvaluators[] = {
+ [TK_TEXT] = s_eval_token_text,
+ [TK_FUNC] = s_eval_token_func,
+ [TK_PARAM] = s_eval_token_param,
+ [TK_VPARAM] = s_eval_token_vparam,
+ [TK_RETURN] = s_eval_token_return,
+ [TK_ENUM] = s_eval_token_enum,
+ [TK_STRUCT] = s_eval_token_enum
+};
+
+token_t*
+tokenize_comment_stripped(comment_block_t *comment, char *buffer)
+{
+ token_t *tokenTail = NULL, *tokenHead = NULL;
+ for(size_t i = 0; buffer[i]; i++) {
+ if(buffer[i] != '@') continue;
+
+ size_t bufferWidth = strlen(&buffer[++i]);
+ /*Look for token keywords.*/
+
+ token_t *token = NULL;
+ for(size_t tki = 0; tki < token_typeCount; tki++) {
+ const char *tokenName = token_typeNames[tki];
+ size_t tokenNameW = strlen(tokenName);
+
+ if(bufferWidth < tokenNameW
+ || strncmp(&buffer[i], tokenName, tokenNameW)
+ || buffer[i + tokenNameW] != ' ') continue;
+ if(tokenParsers[tki] == NULL) continue;
+
+ i += tokenNameW + 1;
+ token = tokenParsers[tki](comment, buffer, &i);
+ break;
+ }
+ if(token == NULL) continue;
+ if(tokenTail == NULL) tokenTail = tokenHead = token;
+ else {
+ tokenHead->next = token;
+ tokenHead = token;
+ }
+ }
+ return tokenTail;
+}
+
+/**@FUNC Converts a comment block into a stream of tokens.
+ * @PARAM srcfile file to source comment data from.
+ * @PARAM comment data to use to read comment block.
+ * */
+token_t*
+tokenize_comment(FILE *srcfile, comment_block_t *comment)
+{
+ size_t commentSize = comment->end - comment->start;
+ char *commentBuffer = calloc(commentSize, 1);
+ size_t strippedSize = 0;
+
+ long opos = ftell(srcfile);
+ fseek(srcfile, comment->start, SEEK_SET);
+ size_t readsize = fread(commentBuffer, 1, commentSize, srcfile);
+ fseek(srcfile, opos, SEEK_SET);
+
+ if(commentSize > readsize) commentSize = readsize;
+
+ //First, strip the comment of comment characters.
+ for(size_t i = 0; i < commentSize; i++) {
+ char c = commentBuffer[i];
+ if(isspace(c) || c == '*' || c == '/') continue;
+ while(c != '\n' && i < commentSize) {
+ commentBuffer[strippedSize++] = c;
+ c = commentBuffer[++i];
+
+ if(c == '\n')
+ commentBuffer[strippedSize++] = c;
+ if(i >= commentSize) break;
+ }
+ }
+
+ if(commentBuffer[strippedSize - 2] == '/' &&
+ commentBuffer[strippedSize - 3] == '*') strippedSize -= 3;
+
+ commentBuffer[strippedSize] = 0;
+ return tokenize_comment_stripped(comment, commentBuffer);
+}
+
+void
+process_comment(comment_block_t *comment)
+{
+ token_t *token = tokenize_comment(comment->file, comment);
+ while(token) {
+ token_evaluator_t eval = tokenEvaluators[token->type];
+ if(!eval) {
+ token_t *next = token->next;
+ free(token);
+ token = next;
+ continue;
+ }
+ token = eval(comment, token);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ /* TODO: Proper file IO.*/
+ char *ovalue = NULL;
+ int opt;
+ while((opt = getopt(argc, argv, "o:")) != -1) {
+ switch(opt) {
+ case 'o':
+ ovalue = optarg;
+ break;
+ case '?':
+ if(optopt == 'o')
+ fail("Option %c requires an argument.\n", optopt);
+ else if(isprint(optopt))
+ fail("Unknown option '%c'\n", optopt);
+ else
+ fail("Unknown option character '\\x%x'.\n", optopt);
+ default:
+ abort();
+ }
+ }
+
+ if(ovalue) {
+ docfile = fopen(ovalue, "w");
+ if(!docfile)
+ fail("Failed to open output file '%s' for writing\n", ovalue);
+ }else docfile = stdout;
+
+ int nsrc = 0;
+ for(int i = optind; i < argc; i++) {
+ char *srcstr = argv[i];
+ FILE *srcfile = fopen(srcstr, "r");
+ if(!srcfile) fail("Failed to open source file '%s' for reading\n", srcstr);
+
+ comment_block_t *block = NULL;
+ while((block = next_comment_block(srcfile))) {
+ process_comment(block);
+ free(block);
+ }
+
+ fclose(srcfile);
+ nsrc++;
+ }
+
+ if(nsrc == 0)
+ fail("Missing input files.\n");
+
+ return 0;
+}