#include #include #include #include #include #include #include /**@TEXT # jodgen 1.0.0 * * A documentation generator based on my own commenting specification. * * Usage: * ``` * jdgen [-o OUTFILE] SRCFILE [SRCFILES...] * ``` * * If no output file is specified, text is written to stdout, * meaning another valid way to write output would be: * ``` * jdgen SRCFILE [SRCFILES...] > OUTFILE * ``` * * The following tags are used to emit special text: * `@TEXT` : Output the following block into the outfile. * `@FUNC` : Output a description and function prototype. * `@STRUCT` : Output a description after the struct definition. * `@ENUM` : Output a description after the enum definition. * `@PARAM` : Describes a function parameter. * `@VPARAM` : Describes a variable function parameter. * `@RETURN` : Describes how a function returns a value. * */ /**@ENUM Token types * identifiers for comment format specifiers, starting with \@ * */ enum { TK_TEXT = 0, /* Text specifier. Comment block only is recorded.*/ TK_FUNC, /* Function specifier. Comment block and function prototype is recorded.*/ TK_PARAM, /* Description for parameter in function prototype. */ TK_VPARAM, /* Description for variadic parameter in function prototype.*/ TK_RETURN, /* Return description for function prototype.*/ TK_ENUM, /* Enum specifier. First line is name, following is description, includes enum values.*/ TK_STRUCT, /* Struct sprcifier. First line is formal name, followed by description. includes full struct definition.*/ }; typedef unsigned char token_type_t; const char *token_typeNames[] = { [TK_TEXT] = "TEXT", [TK_FUNC] = "FUNC", [TK_PARAM] = "PARAM", [TK_VPARAM] = "VPARAM", [TK_RETURN] = "RETURN", [TK_ENUM] = "ENUM", [TK_STRUCT] = "STRUCT" }; const size_t token_typeCount = sizeof(token_typeNames) / sizeof(char*); /**@STRUCT Comment block * stores data for each block (or block of) comments in a given source file. * */ typedef struct commentBlock { FILE *file; /* file that this block comment is home to. */ size_t start; /* read offset in the file for the start of the block. */ size_t end; /* read offset in the file for the end of the block. */ } comment_block_t; /**@STRUCT Parsed token * stores data for each comment token. * */ typedef struct token { struct token *next; /*Next token in the token stream. */ FILE *file; /*File this token is located in.*/ token_type_t type; /*Token ID for this token.*/ char *proto; /*Function prototype or structure definition or enum definition.*/ char *name; /* Function name.*/ char text[]; /*Characters associated with this token.*/ } token_t; static FILE *docfile; /**@FUNC Prematurely terminates the program with a specified error message. * @PARAM fmt string to format and print. * @VPARAM arguments to format text with. * */ __attribute__((noreturn)) void fail(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); abort(); } /**@FUNC Extracts the next block of comments from the current position in the * file stream. * Supports double slash comments and block comments. * Moves the file stream to the end of the comment block it finds. * @PARAM srcfile file to read a comment block from. * @RETURN found comment block. NULL upon failure. * */ static comment_block_t* next_comment_block(FILE *srcfile) { char c; comment_block_t *comment = calloc(1, sizeof(comment_block_t)); comment->file = srcfile; bool is_block = false; while((c = fgetc(srcfile)) != EOF) { if(c != '/') continue; c = fgetc(srcfile); if(c == '*') is_block = true; else if(c == '/') is_block = false; else continue; comment->start = ftell(srcfile); while((c = fgetc(srcfile)) != EOF) { comment->end = ftell(srcfile); if(is_block) { if(c != '*') continue; c = fgetc(srcfile); if(c != '/') continue; comment->end += 2; return comment; }else{ if(c != '\n') continue; while(isspace(c = fgetc(srcfile))) continue; if(c == '/' && fgetc(srcfile) == '/') continue; comment->end--; return comment; } } } free(comment); return NULL; } static token_t* s_parse_token_text(comment_block_t *comment, char *buffer, size_t *bufferi) { size_t textw = strlen(&buffer[*bufferi]); token_t *token = calloc(1, sizeof(token_t) + textw); token->file = comment->file; token->proto = NULL; token->type = TK_TEXT; memcpy(token->text, &buffer[*bufferi], textw); while(buffer[*bufferi]) (*bufferi)++; return token; } static size_t s_get_token_desc(char *dest, const char *src) { size_t srcw = strlen(src); size_t writew = 0; size_t extra = 0; for(size_t i = 0; i < srcw; i++) { if(src[i] == '\\') { dest[writew++] = src[++i]; extra++; continue; } if(src[i] == '@') break; dest[writew++] = src[i]; } if(dest[writew - 1] == '\n') dest[--writew] = 0; return writew + extra; } static token_t* s_parse_token_func(comment_block_t *comment, char *buffer, size_t *bufferi) { size_t bufferw = strlen(buffer); size_t textw = bufferw - *bufferi; token_t *token = calloc(1, sizeof(token_t) + textw); (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); token->file = comment->file; token->type = TK_FUNC; //Locate function prototype. size_t proto_len = 0; fseek(comment->file, comment->end, SEEK_SET); for(char c; (c = fgetc(comment->file)) != EOF && c != ';' && c != '{'; proto_len++); fseek(comment->file, comment->end, SEEK_SET); token->proto = calloc(1, proto_len + 1); proto_len = fread(token->proto, proto_len, 1, comment->file); //Locate function name size_t name_len = 0; fseek(comment->file, comment->end, SEEK_SET); for(char c, lastc = 0; (c = fgetc(comment->file)) != EOF; lastc = c) { if(c == '(' && lastc != '_' && lastc != '(') break; } do { fseek(comment->file, -2, SEEK_CUR); name_len++; } while(!isspace(fgetc(comment->file))); token->name = calloc(1, name_len); name_len = fread(token->name, name_len - 1, 1, comment->file); for(char *s = token->proto; *s; s++) if(*s == '\n') *s = ' '; return token; } static token_t* s_parse_token_structlike(comment_block_t *comment, char *buffer, size_t *bufferi, token_type_t type) { size_t bufferw = strlen(buffer); size_t textw = bufferw - *bufferi; token_t *token = calloc(1, sizeof(token_t) + textw); (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); token->file = comment->file; token->type = type; //Locate function prototype. size_t proto_len = 0; fseek(comment->file, comment->end, SEEK_SET); bool is_typedef = false; for(char c; (c = fgetc(comment->file)) != EOF; proto_len++) { if(c == '}') { c = fgetc(comment->file); proto_len++; if(c == ';') break; is_typedef = true; } if(is_typedef && c == ';') break; } proto_len++; fseek(comment->file, comment->end, SEEK_SET); token->proto = calloc(1, proto_len + 1); proto_len = fread(token->proto, proto_len, 1, comment->file); return token; } static token_t* s_parse_token_struct(comment_block_t *comment, char *buffer, size_t *bufferi) { return s_parse_token_structlike(comment, buffer, bufferi, TK_STRUCT); } static token_t* s_parse_token_enum(comment_block_t *comment, char *buffer, size_t *bufferi) { return s_parse_token_structlike(comment, buffer, bufferi, TK_ENUM); } static token_t* s_parse_token_param(comment_block_t *comment, char *buffer, size_t *bufferi) { size_t bufferw = strlen(buffer); size_t textw = bufferw - *bufferi; token_t *token = calloc(1, sizeof(token_t) + textw); //Locate variable name. size_t protow = 0; for(; !isspace(buffer[*bufferi + protow]); protow++); token->proto = calloc(protow + 1, 1); memcpy(token->proto, &buffer[*bufferi], protow); *bufferi += protow + 1; //Copy param description. (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); token->file = comment->file; token->type = TK_PARAM; return token; } static token_t* s_parse_token_vparam(comment_block_t *comment, char *buffer, size_t *bufferi) { size_t bufferw = strlen(buffer); size_t textw = bufferw - *bufferi; token_t *token = calloc(1, sizeof(token_t) + textw); //Copy param description. (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); token->file = comment->file; token->type = TK_VPARAM; return token; } static token_t* s_parse_token_return(comment_block_t *comment, char *buffer, size_t *bufferi) { size_t bufferw = strlen(buffer); size_t textw = bufferw - *bufferi; token_t *token = calloc(1, sizeof(token_t) + textw); //Copy param description. (*bufferi) += s_get_token_desc(token->text, &buffer[*bufferi]); token->file = comment->file; token->type = TK_RETURN; return token; } static token_t* s_eval_token_text(comment_block_t *comment, token_t *token) { (void)comment; token_t *next = token->next; fprintf(docfile, "%s\n\n***\n", token->text); free(token); return next; } static void s_md_codeblock(const char *text) { fprintf(docfile, "```\n%s\n```\n", text); } static token_t* s_eval_token_func(comment_block_t *comment, token_t *token) { (void)comment; token_t *next = token->next; fprintf(docfile, "#### %s\n\n", token->name); fprintf(docfile, "%s\n\n", token->text); s_md_codeblock(token->proto); free(token->proto); free(token->name); free(token); return next; } static token_t* s_eval_token_enum(comment_block_t *comment, token_t *token) { (void)comment; token_t *next = token->next; s_md_codeblock(token->proto); fprintf(docfile, "%s\n\n***\n", token->text); free(token->proto); free(token); return next; } static token_t* s_eval_token_param(comment_block_t *comment, token_t *token) { (void)comment; token_t *next = token->next; fprintf(docfile, "`%s` : ", token->proto); fprintf(docfile, "%s\n\n", token->text); free(token); return next; } static token_t* s_eval_token_vparam(comment_block_t *comment, token_t *token) { (void)comment; token_t *next = token->next; fprintf(docfile, "`...` : %s\n\n", token->text); free(token); return next; } static token_t* s_eval_token_return(comment_block_t *comment, token_t *token) { (void)comment; token_t *next = token->next; fprintf(docfile, "`return` : %s\n\n", token->text); free(token); return next; } static token_t *(*tokenParsers[])(comment_block_t*, char*, size_t*) = { [TK_TEXT] = s_parse_token_text, [TK_FUNC] = s_parse_token_func, [TK_PARAM] = s_parse_token_param, [TK_VPARAM] = s_parse_token_vparam, [TK_RETURN] = s_parse_token_return, [TK_ENUM] = s_parse_token_enum, [TK_STRUCT] = s_parse_token_struct }; typedef token_t *(*token_evaluator_t)(comment_block_t*, token_t*); static token_evaluator_t tokenEvaluators[] = { [TK_TEXT] = s_eval_token_text, [TK_FUNC] = s_eval_token_func, [TK_PARAM] = s_eval_token_param, [TK_VPARAM] = s_eval_token_vparam, [TK_RETURN] = s_eval_token_return, [TK_ENUM] = s_eval_token_enum, [TK_STRUCT] = s_eval_token_enum }; token_t* tokenize_comment_stripped(comment_block_t *comment, char *buffer) { token_t *tokenTail = NULL, *tokenHead = NULL; for(size_t i = 0; buffer[i]; i++) { if(buffer[i] != '@') continue; size_t bufferWidth = strlen(&buffer[++i]); /*Look for token keywords.*/ token_t *token = NULL; for(size_t tki = 0; tki < token_typeCount; tki++) { const char *tokenName = token_typeNames[tki]; size_t tokenNameW = strlen(tokenName); if(bufferWidth < tokenNameW || strncmp(&buffer[i], tokenName, tokenNameW) || buffer[i + tokenNameW] != ' ') continue; if(tokenParsers[tki] == NULL) continue; i += tokenNameW + 1; token = tokenParsers[tki](comment, buffer, &i); break; } if(token == NULL) continue; if(tokenTail == NULL) tokenTail = tokenHead = token; else { tokenHead->next = token; tokenHead = token; } } return tokenTail; } /**@FUNC Converts a comment block into a stream of tokens. * @PARAM srcfile file to source comment data from. * @PARAM comment data to use to read comment block. * */ token_t* tokenize_comment(FILE *srcfile, comment_block_t *comment) { size_t commentSize = comment->end - comment->start; char *commentBuffer = calloc(commentSize, 1); size_t strippedSize = 0; long opos = ftell(srcfile); fseek(srcfile, comment->start, SEEK_SET); size_t readsize = fread(commentBuffer, 1, commentSize, srcfile); fseek(srcfile, opos, SEEK_SET); if(commentSize > readsize) commentSize = readsize; //First, strip the comment of comment characters. for(size_t i = 0; i < commentSize; i++) { char c = commentBuffer[i]; if(isspace(c) || c == '*' || c == '/') continue; while(c != '\n' && i < commentSize) { commentBuffer[strippedSize++] = c; c = commentBuffer[++i]; if(c == '\n') commentBuffer[strippedSize++] = c; if(i >= commentSize) break; } } if(commentBuffer[strippedSize - 2] == '/' && commentBuffer[strippedSize - 3] == '*') strippedSize -= 3; commentBuffer[strippedSize] = 0; return tokenize_comment_stripped(comment, commentBuffer); } void process_comment(comment_block_t *comment) { token_t *token = tokenize_comment(comment->file, comment); while(token) { token_evaluator_t eval = tokenEvaluators[token->type]; if(!eval) { token_t *next = token->next; free(token); token = next; continue; } token = eval(comment, token); } } int main(int argc, char *argv[]) { /* TODO: Proper file IO.*/ char *ovalue = NULL; int opt; while((opt = getopt(argc, argv, "o:")) != -1) { switch(opt) { case 'o': ovalue = optarg; break; case '?': if(optopt == 'o') fail("Option %c requires an argument.\n", optopt); else if(isprint(optopt)) fail("Unknown option '%c'\n", optopt); else fail("Unknown option character '\\x%x'.\n", optopt); default: abort(); } } if(ovalue) { docfile = fopen(ovalue, "w"); if(!docfile) fail("Failed to open output file '%s' for writing\n", ovalue); }else docfile = stdout; int nsrc = 0; for(int i = optind; i < argc; i++) { char *srcstr = argv[i]; FILE *srcfile = fopen(srcstr, "r"); if(!srcfile) fail("Failed to open source file '%s' for reading\n", srcstr); comment_block_t *block = NULL; while((block = next_comment_block(srcfile))) { process_comment(block); free(block); } fclose(srcfile); nsrc++; } if(nsrc == 0) fail("Missing input files.\n"); return 0; }