From: Stefan Sperling Subject: format log messages more efficiently To: gameoftrees@openbsd.org Date: Sun, 1 Aug 2021 15:17:53 +0200 The Git repository at https://github.com/adhn/nanobox-pkgsrc-lite contains the entire git log of pkgsrc from netbsd embedded in a single 100MB commit message. This repo exposes several problems in our code. The first one is that asprintf() is called for every line while preparing a log message for display, which causes many unnecessary temporary allocations. The patch below fixes this by rewriting got_object_commit_get_logmsg() such that only one memory allocation is made when creating a pretty version of a log message. ok? diff aa8b5dd032c8cba930e5be67a90069a95e0001b8 /home/stsp/src/got blob - 011bee15a8060f553f213c02b686ba3734dcbeca file + lib/object_parse.c --- lib/object_parse.c +++ lib/object_parse.c @@ -439,55 +439,48 @@ const struct got_error * got_object_commit_get_logmsg(char **logmsg, struct got_commit_object *commit) { const struct got_error *err = NULL; - char *msg0, *msg, *line, *s; + const char *src; + char *dst; size_t len; - int headers = 1; - *logmsg = NULL; + len = strlen(commit->logmsg); + *logmsg = malloc(len + 2); /* leave room for a trailing \n and \0 */ + if (*logmsg == NULL) + return got_error_from_errno("malloc"); - msg0 = strdup(commit->logmsg); - if (msg0 == NULL) - return got_error_from_errno("strdup"); + /* + * Strip out unusual headers. Headers are separated from the commit + * message body by a single empty line. + */ + src = commit->logmsg; + dst = *logmsg; + while (*src != '\0' && *src != '\n') { + int copy_header = 1, eol = 0; + if (strncmp(src, GOT_COMMIT_LABEL_TREE, + strlen(GOT_COMMIT_LABEL_TREE)) != 0 && + strncmp(src, GOT_COMMIT_LABEL_AUTHOR, + strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 && + strncmp(src, GOT_COMMIT_LABEL_PARENT, + strlen(GOT_COMMIT_LABEL_PARENT)) != 0 && + strncmp(src, GOT_COMMIT_LABEL_COMMITTER, + strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0) + copy_header = 0; - /* Copy log message line by line to strip out unusual headers... */ - msg = msg0; - do { - if ((line = strsep(&msg, "\n")) == NULL) - break; - - if (headers == 1) { - if (line[0] != '\0' && - strncmp(line, GOT_COMMIT_LABEL_TREE, - strlen(GOT_COMMIT_LABEL_TREE)) != 0 && - strncmp(line, GOT_COMMIT_LABEL_AUTHOR, - strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 && - strncmp(line, GOT_COMMIT_LABEL_PARENT, - strlen(GOT_COMMIT_LABEL_PARENT)) != 0 && - strncmp(line, GOT_COMMIT_LABEL_COMMITTER, - strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0) - continue; - - if (line[0] == '\0') - headers = 0; + while (*src != '\0' && !eol) { + if (copy_header) { + *dst = *src; + dst++; + } + if (*src == '\n') + eol = 1; + src++; } + } + *dst = '\0'; - if (asprintf(&s, "%s%s\n", - *logmsg ? *logmsg : "", line) == -1) { - err = got_error_from_errno("asprintf"); - goto done; - } - free(*logmsg); - *logmsg = s; - - } while (line); - - if (*logmsg == NULL) { - /* log message does not contain \n */ - *logmsg = strdup(commit->logmsg); - if (*logmsg == NULL) { - err = got_error_from_errno("strdup"); - goto done; - } + if (strlcat(*logmsg, src, len + 1) >= len + 1) { + err = got_error(GOT_ERR_NO_SPACE); + goto done; } /* Trim redundant trailing whitespace. */ @@ -497,8 +490,13 @@ got_object_commit_get_logmsg(char **logmsg, struct got (*logmsg)[len - 1] = '\0'; len--; } + + /* Append a trailing newline if missing. */ + if (len > 0 && (*logmsg)[len - 1] != '\n') { + (*logmsg)[len] = '\n'; + (*logmsg)[len + 1] = '\0'; + } done: - free(msg0); if (err) { free(*logmsg); *logmsg = NULL;