X-Git-Url: https://git.cameronkatri.com/cgit.git/blobdiff_plain/e0572c39f78b4f88c706a49a60e211302b8e015c..fac4da38a074a831e2ef2476dde23a6bc073cc28:/parsing.c

diff --git a/parsing.c b/parsing.c
index 5093b8b..9b7efb3 100644
--- a/parsing.c
+++ b/parsing.c
@@ -8,130 +8,6 @@
 
 #include "cgit.h"
 
-int next_char(FILE *f)
-{
-	int c = fgetc(f);
-	if (c=='\r') {
-		c = fgetc(f);
-		if (c!='\n') {
-			ungetc(c, f);
-			c = '\r';
-		}
-	}
-	return c;
-}
-
-void skip_line(FILE *f)
-{
-	int c;
-
-	while((c=next_char(f)) && c!='\n' && c!=EOF)
-		;
-}
-
-int read_config_line(FILE *f, char *line, const char **value, int bufsize)
-{
-	int i = 0, isname = 0;
-
-	*value = NULL;
-	while(i<bufsize-1) {
-		int c = next_char(f);
-		if (!isname && (c=='#' || c==';')) {
-			skip_line(f);
-			continue;
-		}
-		if (!isname && isspace(c))
-			continue;
-
-		if (c=='=' && !*value) {
-			line[i] = 0;
-			*value = &line[i+1];
-		} else if (c=='\n' && !isname) {
-			i = 0;
-			continue;
-		} else if (c=='\n' || c==EOF) {
-			line[i] = 0;
-			break;
-		} else {
-			line[i]=c;
-		}
-		isname = 1;
-		i++;
-	}
-	line[i+1] = 0;
-	return i;
-}
-
-int cgit_read_config(const char *filename, configfn fn)
-{
-	static int nesting;
-	int len;
-	char line[256];
-	const char *value;
-	FILE *f;
-
-	/* cancel deeply nested include-commands */
-	if (nesting > 8)
-		return -1;
-	if (!(f = fopen(filename, "r")))
-		return -1;
-	nesting++;
-	while((len = read_config_line(f, line, &value, sizeof(line))) > 0)
-		(*fn)(line, value);
-	nesting--;
-	fclose(f);
-	return 0;
-}
-
-char *convert_query_hexchar(char *txt)
-{
-	int d1, d2;
-	if (strlen(txt) < 3) {
-		*txt = '\0';
-		return txt-1;
-	}
-	d1 = hextoint(*(txt+1));
-	d2 = hextoint(*(txt+2));
-	if (d1<0 || d2<0) {
-		strcpy(txt, txt+3);
-		return txt-1;
-	} else {
-		*txt = d1 * 16 + d2;
-		strcpy(txt+1, txt+3);
-		return txt;
-	}
-}
-
-int cgit_parse_query(char *txt, configfn fn)
-{
-	char *t, *value = NULL, c;
-
-	if (!txt)
-		return 0;
-
-	t = txt = xstrdup(txt);
-
-	while((c=*t) != '\0') {
-		if (c=='=') {
-			*t = '\0';
-			value = t+1;
-		} else if (c=='+') {
-			*t = ' ';
-		} else if (c=='%') {
-			t = convert_query_hexchar(t);
-		} else if (c=='&') {
-			*t = '\0';
-			(*fn)(txt, value);
-			txt = t+1;
-			value = NULL;
-		}
-		t++;
-	}
-	if (t!=txt)
-		(*fn)(txt, value);
-	return 0;
-}
-
 /*
  * url syntax: [repo ['/' cmd [ '/' path]]]
  *   repo: any valid repo url, may contain '/'
@@ -143,35 +19,35 @@ void cgit_parse_url(const char *url)
 {
 	char *cmd, *p;
 
-	cgit_repo = NULL;
+	ctx.repo = NULL;
 	if (!url || url[0] == '\0')
 		return;
 
-	cgit_repo = cgit_get_repoinfo(url);
-	if (cgit_repo) {
-		cgit_query_repo = cgit_repo->url;
+	ctx.repo = cgit_get_repoinfo(url);
+	if (ctx.repo) {
+		ctx.qry.repo = ctx.repo->url;
 		return;
 	}
 
 	cmd = strchr(url, '/');
-	while (!cgit_repo && cmd) {
+	while (!ctx.repo && cmd) {
 		cmd[0] = '\0';
-		cgit_repo = cgit_get_repoinfo(url);
-		if (cgit_repo == NULL) {
+		ctx.repo = cgit_get_repoinfo(url);
+		if (ctx.repo == NULL) {
 			cmd[0] = '/';
 			cmd = strchr(cmd + 1, '/');
 			continue;
 		}
 
-		cgit_query_repo = cgit_repo->url;
+		ctx.qry.repo = ctx.repo->url;
 		p = strchr(cmd + 1, '/');
 		if (p) {
 			p[0] = '\0';
 			if (p[1])
-				cgit_query_path = trim_end(p + 1, '/');
+				ctx.qry.path = trim_end(p + 1, '/');
 		}
-		cgit_cmd = cgit_get_cmd_index(cmd + 1);
-		cgit_query_page = xstrdup(cmd + 1);
+		if (cmd[1])
+			ctx.qry.page = xstrdup(cmd + 1);
 		return;
 	}
 }
@@ -180,16 +56,78 @@ char *substr(const char *head, const char *tail)
 {
 	char *buf;
 
+	if (tail < head)
+		return xstrdup("");
 	buf = xmalloc(tail - head + 1);
 	strncpy(buf, head, tail - head);
 	buf[tail - head] = '\0';
 	return buf;
 }
 
+char *parse_user(char *t, char **name, char **email, unsigned long *date)
+{
+	char *p = t;
+	int mode = 1;
+
+	while (p && *p) {
+		if (mode == 1 && *p == '<') {
+			*name = substr(t, p - 1);
+			t = p;
+			mode++;
+		} else if (mode == 1 && *p == '\n') {
+			*name = substr(t, p);
+			p++;
+			break;
+		} else if (mode == 2 && *p == '>') {
+			*email = substr(t, p + 1);
+			t = p;
+			mode++;
+		} else if (mode == 2 && *p == '\n') {
+			*email = substr(t, p);
+			p++;
+			break;
+		} else if (mode == 3 && isdigit(*p)) {
+			*date = atol(p);
+			mode++;
+		} else if (*p == '\n') {
+			p++;
+			break;
+		}
+		p++;
+	}
+	return p;
+}
+
+#ifdef NO_ICONV
+#define reencode(a, b, c)
+#else
+const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
+{
+	char *tmp;
+
+	if (!txt)
+		return NULL;
+
+	if (!*txt || !src_enc || !dst_enc)
+		return *txt;
+
+	/* no encoding needed if src_enc equals dst_enc */
+	if (!strcasecmp(src_enc, dst_enc))
+		return *txt;
+
+	tmp = reencode_string(*txt, dst_enc, src_enc);
+	if (tmp) {
+		free(*txt);
+		*txt = tmp;
+	}
+	return *txt;
+}
+#endif
+
 struct commitinfo *cgit_parse_commit(struct commit *commit)
 {
 	struct commitinfo *ret;
-	char *p = commit->buffer, *t = commit->buffer;
+	char *p = commit->buffer, *t;
 
 	ret = xmalloc(sizeof(*ret));
 	ret->commit = commit;
@@ -212,71 +150,64 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
 	while (!strncmp(p, "parent ", 7))
 		p += 48; // "parent " + hex[40] + "\n"
 
-	if (!strncmp(p, "author ", 7)) {
-		p += 7;
-		t = strchr(p, '<') - 1;
-		ret->author = substr(p, t);
-		p = t;
-		t = strchr(t, '>') + 1;
-		ret->author_email = substr(p, t);
-		ret->author_date = atol(t+1);
-		p = strchr(t, '\n') + 1;
+	if (p && !strncmp(p, "author ", 7)) {
+		p = parse_user(p + 7, &ret->author, &ret->author_email,
+			&ret->author_date);
 	}
 
-	if (!strncmp(p, "committer ", 9)) {
-		p += 9;
-		t = strchr(p, '<') - 1;
-		ret->committer = substr(p, t);
-		p = t;
-		t = strchr(t, '>') + 1;
-		ret->committer_email = substr(p, t);
-		ret->committer_date = atol(t+1);
-		p = strchr(t, '\n') + 1;
+	if (p && !strncmp(p, "committer ", 9)) {
+		p = parse_user(p + 9, &ret->committer, &ret->committer_email,
+			&ret->committer_date);
 	}
 
-	if (!strncmp(p, "encoding ", 9)) {
+	if (p && !strncmp(p, "encoding ", 9)) {
 		p += 9;
-		t = strchr(p, '\n') + 1;
-		ret->msg_encoding = substr(p, t);
-		p = t;
-	} else
-		ret->msg_encoding = xstrdup(PAGE_ENCODING);
+		t = strchr(p, '\n');
+		if (t) {
+			ret->msg_encoding = substr(p, t + 1);
+			p = t + 1;
+		}
+	}
+
+	/* if no special encoding is found, assume UTF-8 */
+	if (!ret->msg_encoding)
+		ret->msg_encoding = xstrdup("UTF-8");
 
-	while (*p && (*p != '\n'))
-		p = strchr(p, '\n') + 1; // skip unknown header fields
+	// skip unknown header fields
+	while (p && *p && (*p != '\n')) {
+		p = strchr(p, '\n');
+		if (p)
+			p++;
+	}
+
+	// skip empty lines between headers and message
+	while (p && *p == '\n')
+		p++;
 
-	while (*p == '\n')
-		p = strchr(p, '\n') + 1;
+	if (!p)
+		return ret;
 
 	t = strchr(p, '\n');
 	if (t) {
-		if (*t == '\0')
-			ret->subject = "** empty **";
-		else
-			ret->subject = substr(p, t);
+		ret->subject = substr(p, t);
 		p = t + 1;
 
-		while (*p == '\n')
-			p = strchr(p, '\n') + 1;
-		ret->msg = xstrdup(p);
-	} else
-		ret->subject = substr(p, p+strlen(p));
-
-	if(strcmp(ret->msg_encoding, PAGE_ENCODING)) {
-		t = reencode_string(ret->subject, PAGE_ENCODING,
-				    ret->msg_encoding);
-		if(t) {
-			free(ret->subject);
-			ret->subject = t;
+		while (p && *p == '\n') {
+			p = strchr(p, '\n');
+			if (p)
+				p++;
 		}
+		if (p)
+			ret->msg = xstrdup(p);
+	} else
+		ret->subject = xstrdup(p);
 
-		t = reencode_string(ret->msg, PAGE_ENCODING,
-				    ret->msg_encoding);
-		if(t) {
-			free(ret->msg);
-			ret->msg = t;
-		}
-	}
+	reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
 
 	return ret;
 }
@@ -287,7 +218,7 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
 	void *data;
 	enum object_type type;
 	unsigned long size;
-	char *p, *t;
+	char *p;
 	struct taginfo *ret;
 
 	data = read_sha1_file(tag->object.sha1, &type, &size);
@@ -309,22 +240,19 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
 			break;
 
 		if (!strncmp(p, "tagger ", 7)) {
-			p += 7;
-			t = strchr(p, '<') - 1;
-			ret->tagger = substr(p, t);
-			p = t;
-			t = strchr(t, '>') + 1;
-			ret->tagger_email = substr(p, t);
-			ret->tagger_date = atol(t+1);
+			p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
+				&ret->tagger_date);
+		} else {
+			p = strchr(p, '\n');
+			if (p)
+				p++;
 		}
-		p = strchr(p, '\n') + 1;
 	}
 
-	while (p && *p && (*p != '\n'))
-		p = strchr(p, '\n') + 1; // skip unknown tag fields
+	// skip empty lines between headers and message
+	while (p && *p == '\n')
+		p++;
 
-	while (p && (*p == '\n'))
-		p = strchr(p, '\n') + 1;
 	if (p && *p)
 		ret->msg = xstrdup(p);
 	free(data);