"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
format log messages more efficiently
To:
gameoftrees@openbsd.org
Date:
Sun, 1 Aug 2021 15:17:53 +0200

Download raw body.

Thread
The Git repository at https://github.com/adhn/nanobox-pkgsrc-lite
contains the entire git log of pkgsrc from netbsd embedded in a
single 100MB commit message.

This repo exposes several problems in our code. The first one is that
asprintf() is called for every line while preparing a log message for
display, which causes many unnecessary temporary allocations.

The patch below fixes this by rewriting got_object_commit_get_logmsg()
such that only one memory allocation is made when creating a pretty
version of a log message.

ok?

diff aa8b5dd032c8cba930e5be67a90069a95e0001b8 /home/stsp/src/got
blob - 011bee15a8060f553f213c02b686ba3734dcbeca
file + lib/object_parse.c
--- lib/object_parse.c
+++ lib/object_parse.c
@@ -439,55 +439,48 @@ const struct got_error *
 got_object_commit_get_logmsg(char **logmsg, struct got_commit_object *commit)
 {
 	const struct got_error *err = NULL;
-	char *msg0, *msg, *line, *s;
+	const char *src;
+	char *dst;
 	size_t len;
-	int headers = 1;
 
-	*logmsg = NULL;
+	len = strlen(commit->logmsg);
+	*logmsg = malloc(len + 2); /* leave room for a trailing \n and \0 */
+	if (*logmsg == NULL)
+		return got_error_from_errno("malloc");
 
-	msg0 = strdup(commit->logmsg);
-	if (msg0 == NULL)
-		return got_error_from_errno("strdup");
+	/*
+	 * Strip out unusual headers. Headers are separated from the commit
+	 * message body by a single empty line.
+	 */
+	src = commit->logmsg;
+	dst = *logmsg;
+	while (*src != '\0' && *src != '\n') {
+		int copy_header = 1, eol = 0;
+		if (strncmp(src, GOT_COMMIT_LABEL_TREE,
+		    strlen(GOT_COMMIT_LABEL_TREE)) != 0 &&
+		    strncmp(src, GOT_COMMIT_LABEL_AUTHOR,
+		    strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 &&
+		    strncmp(src, GOT_COMMIT_LABEL_PARENT,
+		    strlen(GOT_COMMIT_LABEL_PARENT)) != 0 &&
+		    strncmp(src, GOT_COMMIT_LABEL_COMMITTER,
+		    strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0)
+			copy_header = 0;
 
-	/* Copy log message line by line to strip out unusual headers... */
-	msg = msg0;
-	do {
-		if ((line = strsep(&msg, "\n")) == NULL)
-			break;
-
-		if (headers == 1) {
-			if (line[0] != '\0' &&
-			    strncmp(line, GOT_COMMIT_LABEL_TREE,
-			        strlen(GOT_COMMIT_LABEL_TREE)) != 0 &&
-			    strncmp(line, GOT_COMMIT_LABEL_AUTHOR,
-			        strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 &&
-			    strncmp(line, GOT_COMMIT_LABEL_PARENT,
-			        strlen(GOT_COMMIT_LABEL_PARENT)) != 0 &&
-			    strncmp(line, GOT_COMMIT_LABEL_COMMITTER,
-			        strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0)
-				continue;
-
-			if (line[0] == '\0')
-				headers = 0;
+		while (*src != '\0' && !eol) {
+			if (copy_header) {
+				*dst = *src;
+				dst++;
+			}
+			if (*src == '\n')
+				eol = 1;
+			src++;
 		}
+	}
+	*dst = '\0';
 
-		if (asprintf(&s, "%s%s\n",
-		    *logmsg ? *logmsg : "", line) == -1) {
-			err = got_error_from_errno("asprintf");
-			goto done;
-		}
-		free(*logmsg);
-		*logmsg = s;
-
-	} while (line);
-
-	if (*logmsg == NULL) {
-		/* log message does not contain \n */
-		*logmsg = strdup(commit->logmsg);
-		if (*logmsg == NULL) {
-			err = got_error_from_errno("strdup");
-			goto done;
-		}
+	if (strlcat(*logmsg, src, len + 1) >= len + 1) {
+		err = got_error(GOT_ERR_NO_SPACE);
+		goto done;
 	}
 
 	/* Trim redundant trailing whitespace. */
@@ -497,8 +490,13 @@ got_object_commit_get_logmsg(char **logmsg, struct got
 		(*logmsg)[len - 1] = '\0';
 		len--;
 	}
+
+	/* Append a trailing newline if missing. */
+	if (len > 0 && (*logmsg)[len - 1] != '\n') {
+		(*logmsg)[len] = '\n';
+		(*logmsg)[len + 1] = '\0';
+	}
 done:
-	free(msg0);
 	if (err) {
 		free(*logmsg);
 		*logmsg = NULL;