Download raw body.
diff_output_lines() performance fix
The diff_output_lines() function currently calls fprintf() on
each character contained in a line of diff text.
Buffering bytes and writing out a line at a time improves
performance of 'got diff' and 'tog diff'.
To avoid dynamic allocation overhead this patch uses a stack buffer
and splits very long lines across several fprintf() calls.
The test suite caught an issue with embedded NUL bytes which must
be displayed by 'got diff -a'. Embedded NUL bytes are incompatible
with the call fprintf(dest, "%s\n", buf);
But this is an edge case. We can simply write NUL bytes out directly.
ok?
As usual, I would apply this patch to the diff.git repository first
and then refer to that commit's ID in a follow-up commit to got.git.
diff c2cf8015b6da213404a3d87a14ff228d1ca99c50 946cef407c5cd78b4062504b4730ec1c38dad531
blob - daf8b8cb069e52f434eb062cf50fbf0d99b33ecd
blob + 87214405330d85a6d6640dc6015f345f543cbfcf
--- lib/diff_output.c
+++ lib/diff_output.c
@@ -55,6 +55,8 @@ get_atom_byte(int *ch, struct diff_atom *atom, off_t o
return 0;
}
+#define DIFF_OUTPUT_BUF_SIZE 512
+
int
diff_output_lines(struct diff_output_info *outinfo, FILE *dest,
const char *prefix, struct diff_atom *start_atom,
@@ -71,12 +73,16 @@ diff_output_lines(struct diff_output_info *outinfo, FI
foreach_diff_atom(atom, start_atom, count) {
off_t outlen = 0;
- int i, ch;
+ int i, ch, nbuf = 0;
unsigned int len = atom->len;
- rc = fprintf(dest, "%s", prefix);
- if (rc < 0)
- return errno;
- outlen += rc;
+ unsigned char buf[DIFF_OUTPUT_BUF_SIZE + 1];
+ size_t n;
+
+ n = strlcpy(buf, prefix, sizeof(buf));
+ if (n >= sizeof(buf))
+ return ENOBUFS;
+ nbuf += n;
+
if (len) {
rc = get_atom_byte(&ch, atom, len - 1);
if (rc)
@@ -96,12 +102,26 @@ diff_output_lines(struct diff_output_info *outinfo, FI
rc = get_atom_byte(&ch, atom, i);
if (rc)
return rc;
- rc = fprintf(dest, "%c", (unsigned char)ch);
- if (rc < 0)
- return errno;
- outlen += rc;
+ if (nbuf >= DIFF_OUTPUT_BUF_SIZE ||
+ (nbuf > 0 && ch == '\0')) {
+ buf[nbuf] = '\0';
+ rc = fprintf(dest, "%s", buf);
+ if (rc < 0)
+ return errno;
+ outlen += rc;
+ nbuf = 0;
+ }
+
+ /* Don't buffer embedded NUL bytes in binary files. */
+ if (ch == '\0') {
+ if (fputc(ch, dest) == EOF)
+ return errno;
+ outlen++;
+ } else
+ buf[nbuf++] = ch;
}
- rc = fprintf(dest, "\n");
+ buf[nbuf] = '\0';
+ rc = fprintf(dest, "%s\n", buf);
if (rc < 0)
return errno;
outlen += rc;
diff_output_lines() performance fix