Reencode author and committer
[cgit.git] / parsing.c
index 74a248449f35fb2a5331f571d3bea30a2806acab..f37c49d261765636cfef652faec3be8baf6d2894 100644 (file)
--- a/parsing.c
+++ b/parsing.c
@@ -8,130 +8,6 @@
 
 #include "cgit.h"
 
-int next_char(FILE *f)
-{
-       int c = fgetc(f);
-       if (c=='\r') {
-               c = fgetc(f);
-               if (c!='\n') {
-                       ungetc(c, f);
-                       c = '\r';
-               }
-       }
-       return c;
-}
-
-void skip_line(FILE *f)
-{
-       int c;
-
-       while((c=next_char(f)) && c!='\n' && c!=EOF)
-               ;
-}
-
-int read_config_line(FILE *f, char *line, const char **value, int bufsize)
-{
-       int i = 0, isname = 0;
-
-       *value = NULL;
-       while(i<bufsize-1) {
-               int c = next_char(f);
-               if (!isname && (c=='#' || c==';')) {
-                       skip_line(f);
-                       continue;
-               }
-               if (!isname && isspace(c))
-                       continue;
-
-               if (c=='=' && !*value) {
-                       line[i] = 0;
-                       *value = &line[i+1];
-               } else if (c=='\n' && !isname) {
-                       i = 0;
-                       continue;
-               } else if (c=='\n' || c==EOF) {
-                       line[i] = 0;
-                       break;
-               } else {
-                       line[i]=c;
-               }
-               isname = 1;
-               i++;
-       }
-       line[i+1] = 0;
-       return i;
-}
-
-int cgit_read_config(const char *filename, configfn fn)
-{
-       static int nesting;
-       int len;
-       char line[256];
-       const char *value;
-       FILE *f;
-
-       /* cancel deeply nested include-commands */
-       if (nesting > 8)
-               return -1;
-       if (!(f = fopen(filename, "r")))
-               return -1;
-       nesting++;
-       while((len = read_config_line(f, line, &value, sizeof(line))) > 0)
-               (*fn)(line, value);
-       nesting--;
-       fclose(f);
-       return 0;
-}
-
-char *convert_query_hexchar(char *txt)
-{
-       int d1, d2;
-       if (strlen(txt) < 3) {
-               *txt = '\0';
-               return txt-1;
-       }
-       d1 = hextoint(*(txt+1));
-       d2 = hextoint(*(txt+2));
-       if (d1<0 || d2<0) {
-               strcpy(txt, txt+3);
-               return txt-1;
-       } else {
-               *txt = d1 * 16 + d2;
-               strcpy(txt+1, txt+3);
-               return txt;
-       }
-}
-
-int cgit_parse_query(char *txt, configfn fn)
-{
-       char *t, *value = NULL, c;
-
-       if (!txt)
-               return 0;
-
-       t = txt = xstrdup(txt);
-
-       while((c=*t) != '\0') {
-               if (c=='=') {
-                       *t = '\0';
-                       value = t+1;
-               } else if (c=='+') {
-                       *t = ' ';
-               } else if (c=='%') {
-                       t = convert_query_hexchar(t);
-               } else if (c=='&') {
-                       *t = '\0';
-                       (*fn)(txt, value);
-                       txt = t+1;
-                       value = NULL;
-               }
-               t++;
-       }
-       if (t!=txt)
-               (*fn)(txt, value);
-       return 0;
-}
-
 /*
  * url syntax: [repo ['/' cmd [ '/' path]]]
  *   repo: any valid repo url, may contain '/'
@@ -143,35 +19,35 @@ void cgit_parse_url(const char *url)
 {
        char *cmd, *p;
 
-       cgit_repo = NULL;
+       ctx.repo = NULL;
        if (!url || url[0] == '\0')
                return;
 
-       cgit_repo = cgit_get_repoinfo(url);
-       if (cgit_repo) {
-               cgit_query_repo = cgit_repo->url;
+       ctx.repo = cgit_get_repoinfo(url);
+       if (ctx.repo) {
+               ctx.qry.repo = ctx.repo->url;
                return;
        }
 
        cmd = strchr(url, '/');
-       while (!cgit_repo && cmd) {
+       while (!ctx.repo && cmd) {
                cmd[0] = '\0';
-               cgit_repo = cgit_get_repoinfo(url);
-               if (cgit_repo == NULL) {
+               ctx.repo = cgit_get_repoinfo(url);
+               if (ctx.repo == NULL) {
                        cmd[0] = '/';
                        cmd = strchr(cmd + 1, '/');
                        continue;
                }
 
-               cgit_query_repo = cgit_repo->url;
+               ctx.qry.repo = ctx.repo->url;
                p = strchr(cmd + 1, '/');
                if (p) {
                        p[0] = '\0';
                        if (p[1])
-                               cgit_query_path = xstrdup(p + 1);
+                               ctx.qry.path = trim_end(p + 1, '/');
                }
-               cgit_cmd = cgit_get_cmd_index(cmd + 1);
-               cgit_query_page = xstrdup(cmd + 1);
+               if (cmd[1])
+                       ctx.qry.page = xstrdup(cmd + 1);
                return;
        }
 }
@@ -186,6 +62,59 @@ char *substr(const char *head, const char *tail)
        return buf;
 }
 
+char *parse_user(char *t, char **name, char **email, unsigned long *date)
+{
+       char *p = t;
+       int mode = 1;
+
+       while (p && *p) {
+               if (mode == 1 && *p == '<') {
+                       *name = substr(t, p - 1);
+                       t = p;
+                       mode++;
+               } else if (mode == 1 && *p == '\n') {
+                       *name = substr(t, p);
+                       p++;
+                       break;
+               } else if (mode == 2 && *p == '>') {
+                       *email = substr(t, p + 1);
+                       t = p;
+                       mode++;
+               } else if (mode == 2 && *p == '\n') {
+                       *email = substr(t, p);
+                       p++;
+                       break;
+               } else if (mode == 3 && isdigit(*p)) {
+                       *date = atol(p);
+                       mode++;
+               } else if (*p == '\n') {
+                       p++;
+                       break;
+               }
+               p++;
+       }
+       return p;
+}
+
+#ifdef NO_ICONV
+#define reencode(a, b, c)
+#else
+const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
+{
+       char *tmp;
+
+       if (!txt || !*txt || !src_enc || !dst_enc)
+               return *txt;
+
+       tmp = reencode_string(*txt, src_enc, dst_enc);
+       if (tmp) {
+               free(*txt);
+               *txt = tmp;
+       }
+       return *txt;
+}
+#endif
+
 struct commitinfo *cgit_parse_commit(struct commit *commit)
 {
        struct commitinfo *ret;
@@ -199,6 +128,7 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
        ret->committer_email = NULL;
        ret->subject = NULL;
        ret->msg = NULL;
+       ret->msg_encoding = NULL;
 
        if (p == NULL)
                return ret;
@@ -211,44 +141,62 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
        while (!strncmp(p, "parent ", 7))
                p += 48; // "parent " + hex[40] + "\n"
 
-       if (!strncmp(p, "author ", 7)) {
-               p += 7;
-               t = strchr(p, '<') - 1;
-               ret->author = substr(p, t);
-               p = t;
-               t = strchr(t, '>') + 1;
-               ret->author_email = substr(p, t);
-               ret->author_date = atol(++t);
-               p = strchr(t, '\n') + 1;
+       if (p && !strncmp(p, "author ", 7)) {
+               p = parse_user(p + 7, &ret->author, &ret->author_email,
+                       &ret->author_date);
+       }
+
+       if (p && !strncmp(p, "committer ", 9)) {
+               p = parse_user(p + 9, &ret->committer, &ret->committer_email,
+                       &ret->committer_date);
        }
 
-       if (!strncmp(p, "committer ", 9)) {
+       if (p && !strncmp(p, "encoding ", 9)) {
                p += 9;
-               t = strchr(p, '<') - 1;
-               ret->committer = substr(p, t);
-               p = t;
-               t = strchr(t, '>') + 1;
-               ret->committer_email = substr(p, t);
-               ret->committer_date = atol(++t);
-               p = strchr(t, '\n') + 1;
+               t = strchr(p, '\n');
+               if (t) {
+                       ret->msg_encoding = substr(p, t + 1);
+                       p = t + 1;
+               }
        }
 
-       while (*p == '\n')
-               p = strchr(p, '\n') + 1;
+       // skip unknown header fields
+       while (p && *p && (*p != '\n')) {
+               p = strchr(p, '\n');
+               if (p)
+                       p++;
+       }
+
+       // skip empty lines between headers and message
+       while (p && *p == '\n')
+               p++;
+
+       if (!p)
+               return ret;
 
        t = strchr(p, '\n');
        if (t) {
-               if (*t == '\0')
-                       ret->subject = strdup("** empty **");
-               else
-                       ret->subject = substr(p, t);
+               ret->subject = substr(p, t);
                p = t + 1;
 
-               while (*p == '\n')
-                       p = strchr(p, '\n') + 1;
-               ret->msg = p;
+               while (p && *p == '\n') {
+                       p = strchr(p, '\n');
+                       if (p)
+                               p++;
+               }
+               if (p)
+                       ret->msg = xstrdup(p);
        } else
-               ret->subject = substr(p, p+strlen(p));
+               ret->subject = xstrdup(p);
+
+       if (ret->msg_encoding) {
+               reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding);
+               reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding);
+               reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding);
+               reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding);
+               reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding);
+               reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
+       }
 
        return ret;
 }
@@ -259,7 +207,7 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
        void *data;
        enum object_type type;
        unsigned long size;
-       char *p, *t;
+       char *p;
        struct taginfo *ret;
 
        data = read_sha1_file(tag->object.sha1, &type, &size);
@@ -281,19 +229,19 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
                        break;
 
                if (!strncmp(p, "tagger ", 7)) {
-                       p += 7;
-                       t = strchr(p, '<') - 1;
-                       ret->tagger = substr(p, t);
-                       p = t;
-                       t = strchr(t, '>') + 1;
-                       ret->tagger_email = substr(p, t);
-                       ret->tagger_date = atol(++t);
+                       p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
+                               &ret->tagger_date);
+               } else {
+                       p = strchr(p, '\n');
+                       if (p)
+                               p++;
                }
-               p = strchr(p, '\n') + 1;
        }
 
-       while (p && (*p == '\n'))
-               p = strchr(p, '\n') + 1;
+       // skip empty lines between headers and message
+       while (p && *p == '\n')
+               p++;
+
        if (p && *p)
                ret->msg = xstrdup(p);
        free(data);