X-Git-Url: https://git.cameronkatri.com/cgit.git/blobdiff_plain/16a3d2779ccd56bf7954d98da547247d8796544b..c0dfaf1c281d0697ce43131343d7a9f170a61ff9:/parsing.c diff --git a/parsing.c b/parsing.c index 2c05c09..658621d 100644 --- a/parsing.c +++ b/parsing.c @@ -8,130 +8,6 @@ #include "cgit.h" -int next_char(FILE *f) -{ - int c = fgetc(f); - if (c=='\r') { - c = fgetc(f); - if (c!='\n') { - ungetc(c, f); - c = '\r'; - } - } - return c; -} - -void skip_line(FILE *f) -{ - int c; - - while((c=next_char(f)) && c!='\n' && c!=EOF) - ; -} - -int read_config_line(FILE *f, char *line, const char **value, int bufsize) -{ - int i = 0, isname = 0; - - *value = NULL; - while(i 8) - return -1; - if (!(f = fopen(filename, "r"))) - return -1; - nesting++; - while((len = read_config_line(f, line, &value, sizeof(line))) > 0) - (*fn)(line, value); - nesting--; - fclose(f); - return 0; -} - -char *convert_query_hexchar(char *txt) -{ - int d1, d2; - if (strlen(txt) < 3) { - *txt = '\0'; - return txt-1; - } - d1 = hextoint(*(txt+1)); - d2 = hextoint(*(txt+2)); - if (d1<0 || d2<0) { - strcpy(txt, txt+3); - return txt-1; - } else { - *txt = d1 * 16 + d2; - strcpy(txt+1, txt+3); - return txt; - } -} - -int cgit_parse_query(char *txt, configfn fn) -{ - char *t, *value = NULL, c; - - if (!txt) - return 0; - - t = txt = xstrdup(txt); - - while((c=*t) != '\0') { - if (c=='=') { - *t = '\0'; - value = t+1; - } else if (c=='+') { - *t = ' '; - } else if (c=='%') { - t = convert_query_hexchar(t); - } else if (c=='&') { - *t = '\0'; - (*fn)(txt, value); - txt = t+1; - value = NULL; - } - t++; - } - if (t!=txt) - (*fn)(txt, value); - return 0; -} - /* * url syntax: [repo ['/' cmd [ '/' path]]] * repo: any valid repo url, may contain '/' @@ -143,53 +19,115 @@ void cgit_parse_url(const char *url) { char *cmd, *p; - cgit_repo = NULL; + ctx.repo = NULL; if (!url || url[0] == '\0') return; - cgit_repo = cgit_get_repoinfo(url); - if (cgit_repo) { - cgit_query_repo = cgit_repo->url; + ctx.repo = cgit_get_repoinfo(url); + if (ctx.repo) { + ctx.qry.repo = ctx.repo->url; return; } cmd = strchr(url, '/'); - while (!cgit_repo && cmd) { + while (!ctx.repo && cmd) { cmd[0] = '\0'; - cgit_repo = cgit_get_repoinfo(url); - if (cgit_repo == NULL) { + ctx.repo = cgit_get_repoinfo(url); + if (ctx.repo == NULL) { cmd[0] = '/'; cmd = strchr(cmd + 1, '/'); continue; } - cgit_query_repo = cgit_repo->url; + ctx.qry.repo = ctx.repo->url; p = strchr(cmd + 1, '/'); if (p) { p[0] = '\0'; if (p[1]) - cgit_query_path = trim_end(p + 1, '/'); + ctx.qry.path = trim_end(p + 1, '/'); } - cgit_cmd = cgit_get_cmd_index(cmd + 1); - cgit_query_page = xstrdup(cmd + 1); + if (cmd[1]) + ctx.qry.page = xstrdup(cmd + 1); return; } } -char *substr(const char *head, const char *tail) +static char *substr(const char *head, const char *tail) { char *buf; + if (tail < head) + return xstrdup(""); buf = xmalloc(tail - head + 1); strncpy(buf, head, tail - head); buf[tail - head] = '\0'; return buf; } +static char *parse_user(char *t, char **name, char **email, unsigned long *date) +{ + char *p = t; + int mode = 1; + + while (p && *p) { + if (mode == 1 && *p == '<') { + *name = substr(t, p - 1); + t = p; + mode++; + } else if (mode == 1 && *p == '\n') { + *name = substr(t, p); + p++; + break; + } else if (mode == 2 && *p == '>') { + *email = substr(t, p + 1); + t = p; + mode++; + } else if (mode == 2 && *p == '\n') { + *email = substr(t, p); + p++; + break; + } else if (mode == 3 && isdigit(*p)) { + *date = atol(p); + mode++; + } else if (*p == '\n') { + p++; + break; + } + p++; + } + return p; +} + +#ifdef NO_ICONV +#define reencode(a, b, c) +#else +static const char *reencode(char **txt, const char *src_enc, const char *dst_enc) +{ + char *tmp; + + if (!txt) + return NULL; + + if (!*txt || !src_enc || !dst_enc) + return *txt; + + /* no encoding needed if src_enc equals dst_enc */ + if (!strcasecmp(src_enc, dst_enc)) + return *txt; + + tmp = reencode_string(*txt, dst_enc, src_enc); + if (tmp) { + free(*txt); + *txt = tmp; + } + return *txt; +} +#endif + struct commitinfo *cgit_parse_commit(struct commit *commit) { struct commitinfo *ret; - char *p = commit->buffer, *t = commit->buffer; + char *p = commit->buffer, *t; ret = xmalloc(sizeof(*ret)); ret->commit = commit; @@ -199,6 +137,7 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) ret->committer_email = NULL; ret->subject = NULL; ret->msg = NULL; + ret->msg_encoding = NULL; if (p == NULL) return ret; @@ -211,44 +150,64 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) while (!strncmp(p, "parent ", 7)) p += 48; // "parent " + hex[40] + "\n" - if (!strncmp(p, "author ", 7)) { - p += 7; - t = strchr(p, '<') - 1; - ret->author = substr(p, t); - p = t; - t = strchr(t, '>') + 1; - ret->author_email = substr(p, t); - ret->author_date = atol(++t); - p = strchr(t, '\n') + 1; + if (p && !strncmp(p, "author ", 7)) { + p = parse_user(p + 7, &ret->author, &ret->author_email, + &ret->author_date); } - if (!strncmp(p, "committer ", 9)) { + if (p && !strncmp(p, "committer ", 9)) { + p = parse_user(p + 9, &ret->committer, &ret->committer_email, + &ret->committer_date); + } + + if (p && !strncmp(p, "encoding ", 9)) { p += 9; - t = strchr(p, '<') - 1; - ret->committer = substr(p, t); - p = t; - t = strchr(t, '>') + 1; - ret->committer_email = substr(p, t); - ret->committer_date = atol(++t); - p = strchr(t, '\n') + 1; + t = strchr(p, '\n'); + if (t) { + ret->msg_encoding = substr(p, t + 1); + p = t + 1; + } } - while (*p == '\n') - p = strchr(p, '\n') + 1; + /* if no special encoding is found, assume UTF-8 */ + if (!ret->msg_encoding) + ret->msg_encoding = xstrdup("UTF-8"); + + // skip unknown header fields + while (p && *p && (*p != '\n')) { + p = strchr(p, '\n'); + if (p) + p++; + } + + // skip empty lines between headers and message + while (p && *p == '\n') + p++; + + if (!p) + return ret; t = strchr(p, '\n'); if (t) { - if (*t == '\0') - ret->subject = strdup("** empty **"); - else - ret->subject = substr(p, t); + ret->subject = substr(p, t); p = t + 1; - while (*p == '\n') - p = strchr(p, '\n') + 1; - ret->msg = p; + while (p && *p == '\n') { + p = strchr(p, '\n'); + if (p) + p++; + } + if (p) + ret->msg = xstrdup(p); } else - ret->subject = substr(p, p+strlen(p)); + ret->subject = xstrdup(p); + + reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING); + reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING); + reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING); + reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING); + reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING); + reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING); return ret; } @@ -259,7 +218,7 @@ struct taginfo *cgit_parse_tag(struct tag *tag) void *data; enum object_type type; unsigned long size; - char *p, *t; + char *p; struct taginfo *ret; data = read_sha1_file(tag->object.sha1, &type, &size); @@ -281,19 +240,19 @@ struct taginfo *cgit_parse_tag(struct tag *tag) break; if (!strncmp(p, "tagger ", 7)) { - p += 7; - t = strchr(p, '<') - 1; - ret->tagger = substr(p, t); - p = t; - t = strchr(t, '>') + 1; - ret->tagger_email = substr(p, t); - ret->tagger_date = atol(++t); + p = parse_user(p + 7, &ret->tagger, &ret->tagger_email, + &ret->tagger_date); + } else { + p = strchr(p, '\n'); + if (p) + p++; } - p = strchr(p, '\n') + 1; } - while (p && (*p == '\n')) - p = strchr(p, '\n') + 1; + // skip empty lines between headers and message + while (p && *p == '\n') + p++; + if (p && *p) ret->msg = xstrdup(p); free(data);