From: Stefan Sperling Subject: diff_output_lines() performance fix To: gameoftrees@openbsd.org Cc: neels@hofmeyr.de Date: Wed, 7 Jul 2021 15:37:04 +0200 The diff_output_lines() function currently calls fprintf() on each character contained in a line of diff text. Buffering bytes and writing out a line at a time improves performance of 'got diff' and 'tog diff'. To avoid dynamic allocation overhead this patch uses a stack buffer and splits very long lines across several fprintf() calls. The test suite caught an issue with embedded NUL bytes which must be displayed by 'got diff -a'. Embedded NUL bytes are incompatible with the call fprintf(dest, "%s\n", buf); But this is an edge case. We can simply write NUL bytes out directly. ok? As usual, I would apply this patch to the diff.git repository first and then refer to that commit's ID in a follow-up commit to got.git. diff c2cf8015b6da213404a3d87a14ff228d1ca99c50 946cef407c5cd78b4062504b4730ec1c38dad531 blob - daf8b8cb069e52f434eb062cf50fbf0d99b33ecd blob + 87214405330d85a6d6640dc6015f345f543cbfcf --- lib/diff_output.c +++ lib/diff_output.c @@ -55,6 +55,8 @@ get_atom_byte(int *ch, struct diff_atom *atom, off_t o return 0; } +#define DIFF_OUTPUT_BUF_SIZE 512 + int diff_output_lines(struct diff_output_info *outinfo, FILE *dest, const char *prefix, struct diff_atom *start_atom, @@ -71,12 +73,16 @@ diff_output_lines(struct diff_output_info *outinfo, FI foreach_diff_atom(atom, start_atom, count) { off_t outlen = 0; - int i, ch; + int i, ch, nbuf = 0; unsigned int len = atom->len; - rc = fprintf(dest, "%s", prefix); - if (rc < 0) - return errno; - outlen += rc; + unsigned char buf[DIFF_OUTPUT_BUF_SIZE + 1]; + size_t n; + + n = strlcpy(buf, prefix, sizeof(buf)); + if (n >= sizeof(buf)) + return ENOBUFS; + nbuf += n; + if (len) { rc = get_atom_byte(&ch, atom, len - 1); if (rc) @@ -96,12 +102,26 @@ diff_output_lines(struct diff_output_info *outinfo, FI rc = get_atom_byte(&ch, atom, i); if (rc) return rc; - rc = fprintf(dest, "%c", (unsigned char)ch); - if (rc < 0) - return errno; - outlen += rc; + if (nbuf >= DIFF_OUTPUT_BUF_SIZE || + (nbuf > 0 && ch == '\0')) { + buf[nbuf] = '\0'; + rc = fprintf(dest, "%s", buf); + if (rc < 0) + return errno; + outlen += rc; + nbuf = 0; + } + + /* Don't buffer embedded NUL bytes in binary files. */ + if (ch == '\0') { + if (fputc(ch, dest) == EOF) + return errno; + outlen++; + } else + buf[nbuf++] = ch; } - rc = fprintf(dest, "\n"); + buf[nbuf] = '\0'; + rc = fprintf(dest, "%s\n", buf); if (rc < 0) return errno; outlen += rc;