]> git.cameronkatri.com Git - cgit.git/blobdiff - parsing.c
tests: handle paths with whitespace
[cgit.git] / parsing.c
index 66e8b3db21fd4369f62f3ba8b0029276ea9e9cd5..602e3de799469ebe8900ee3928638fcac7e850df 100644 (file)
--- a/parsing.c
+++ b/parsing.c
@@ -62,10 +62,70 @@ char *substr(const char *head, const char *tail)
        return buf;
 }
 
+char *parse_user(char *t, char **name, char **email, unsigned long *date)
+{
+       char *p = t;
+       int mode = 1;
+
+       while (p && *p) {
+               if (mode == 1 && *p == '<') {
+                       *name = substr(t, p - 1);
+                       t = p;
+                       mode++;
+               } else if (mode == 1 && *p == '\n') {
+                       *name = substr(t, p);
+                       p++;
+                       break;
+               } else if (mode == 2 && *p == '>') {
+                       *email = substr(t, p + 1);
+                       t = p;
+                       mode++;
+               } else if (mode == 2 && *p == '\n') {
+                       *email = substr(t, p);
+                       p++;
+                       break;
+               } else if (mode == 3 && isdigit(*p)) {
+                       *date = atol(p);
+                       mode++;
+               } else if (*p == '\n') {
+                       p++;
+                       break;
+               }
+               p++;
+       }
+       return p;
+}
+
+#ifdef NO_ICONV
+#define reencode(a, b, c)
+#else
+const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
+{
+       char *tmp;
+
+       if (!txt)
+               return NULL;
+
+       if (!*txt || !src_enc || !dst_enc)
+               return *txt;
+
+       /* no encoding needed if src_enc equals dst_enc */
+       if(!strcasecmp(src_enc, dst_enc))
+               return *txt;
+
+       tmp = reencode_string(*txt, dst_enc, src_enc);
+       if (tmp) {
+               free(*txt);
+               *txt = tmp;
+       }
+       return *txt;
+}
+#endif
+
 struct commitinfo *cgit_parse_commit(struct commit *commit)
 {
        struct commitinfo *ret;
-       char *p = commit->buffer, *t = commit->buffer;
+       char *p = commit->buffer, *t;
 
        ret = xmalloc(sizeof(*ret));
        ret->commit = commit;
@@ -88,71 +148,64 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
        while (!strncmp(p, "parent ", 7))
                p += 48; // "parent " + hex[40] + "\n"
 
-       if (!strncmp(p, "author ", 7)) {
-               p += 7;
-               t = strchr(p, '<') - 1;
-               ret->author = substr(p, t);
-               p = t;
-               t = strchr(t, '>') + 1;
-               ret->author_email = substr(p, t);
-               ret->author_date = atol(t+1);
-               p = strchr(t, '\n') + 1;
+       if (p && !strncmp(p, "author ", 7)) {
+               p = parse_user(p + 7, &ret->author, &ret->author_email,
+                       &ret->author_date);
        }
 
-       if (!strncmp(p, "committer ", 9)) {
-               p += 9;
-               t = strchr(p, '<') - 1;
-               ret->committer = substr(p, t);
-               p = t;
-               t = strchr(t, '>') + 1;
-               ret->committer_email = substr(p, t);
-               ret->committer_date = atol(t+1);
-               p = strchr(t, '\n') + 1;
+       if (p && !strncmp(p, "committer ", 9)) {
+               p = parse_user(p + 9, &ret->committer, &ret->committer_email,
+                       &ret->committer_date);
        }
 
-       if (!strncmp(p, "encoding ", 9)) {
+       if (p && !strncmp(p, "encoding ", 9)) {
                p += 9;
-               t = strchr(p, '\n') + 1;
-               ret->msg_encoding = substr(p, t);
-               p = t;
-       } else
-               ret->msg_encoding = xstrdup(PAGE_ENCODING);
+               t = strchr(p, '\n');
+               if (t) {
+                       ret->msg_encoding = substr(p, t + 1);
+                       p = t + 1;
+               }
+       }
 
-       while (*p && (*p != '\n'))
-               p = strchr(p, '\n') + 1; // skip unknown header fields
+       /* if no special encoding is found, assume UTF-8 */
+       if(!ret->msg_encoding)
+               ret->msg_encoding = xstrdup("UTF-8");
+
+       // skip unknown header fields
+       while (p && *p && (*p != '\n')) {
+               p = strchr(p, '\n');
+               if (p)
+                       p++;
+       }
 
-       while (*p == '\n')
-               p = strchr(p, '\n') + 1;
+       // skip empty lines between headers and message
+       while (p && *p == '\n')
+               p++;
+
+       if (!p)
+               return ret;
 
        t = strchr(p, '\n');
        if (t) {
-               if (*t == '\0')
-                       ret->subject = "** empty **";
-               else
-                       ret->subject = substr(p, t);
+               ret->subject = substr(p, t);
                p = t + 1;
 
-               while (*p == '\n')
-                       p = strchr(p, '\n') + 1;
-               ret->msg = xstrdup(p);
-       } else
-               ret->subject = substr(p, p+strlen(p));
-
-       if(strcmp(ret->msg_encoding, PAGE_ENCODING)) {
-               t = reencode_string(ret->subject, PAGE_ENCODING,
-                                   ret->msg_encoding);
-               if(t) {
-                       free(ret->subject);
-                       ret->subject = t;
+               while (p && *p == '\n') {
+                       p = strchr(p, '\n');
+                       if (p)
+                               p++;
                }
+               if (p)
+                       ret->msg = xstrdup(p);
+       } else
+               ret->subject = xstrdup(p);
 
-               t = reencode_string(ret->msg, PAGE_ENCODING,
-                                   ret->msg_encoding);
-               if(t) {
-                       free(ret->msg);
-                       ret->msg = t;
-               }
-       }
+       reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
+       reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
+       reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
+       reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
+       reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
+       reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
 
        return ret;
 }
@@ -163,7 +216,7 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
        void *data;
        enum object_type type;
        unsigned long size;
-       char *p, *t;
+       char *p;
        struct taginfo *ret;
 
        data = read_sha1_file(tag->object.sha1, &type, &size);
@@ -185,22 +238,19 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
                        break;
 
                if (!strncmp(p, "tagger ", 7)) {
-                       p += 7;
-                       t = strchr(p, '<') - 1;
-                       ret->tagger = substr(p, t);
-                       p = t;
-                       t = strchr(t, '>') + 1;
-                       ret->tagger_email = substr(p, t);
-                       ret->tagger_date = atol(t+1);
+                       p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
+                               &ret->tagger_date);
+               } else {
+                       p = strchr(p, '\n');
+                       if (p)
+                               p++;
                }
-               p = strchr(p, '\n') + 1;
        }
 
-       while (p && *p && (*p != '\n'))
-               p = strchr(p, '\n') + 1; // skip unknown tag fields
+       // skip empty lines between headers and message
+       while (p && *p == '\n')
+               p++;
 
-       while (p && (*p == '\n'))
-               p = strchr(p, '\n') + 1;
        if (p && *p)
                ret->msg = xstrdup(p);
        free(data);