Download raw body.
diff: limit search effort for function prototypes
On Wed, Dec 09, 2020 at 10:45:52PM +0100, Christian Weisgerber wrote: > Stefan Sperling: > > > > GNU diff caches the last function prototype and the line number > > > from which it started searching. > > > > Yes, we could. This is a great idea. Implemented below. This approach > > is also much faster than my previous patch. > > > > So with this diff we only search the 'left' version of the file for > > prototypes. But that's probably how other diff tools do it as well. > > Yes, GNU diff does this as well. > > > ok? > > Basically, yes. > > That's a malloc/free for each prototype. Would a static buffer in > the state be better? Sure. See below. It is slightly faster on OpenBSD because it reduces free(3) checking overhead. > > --- lib/diff_output_unidiff.c > > +++ lib/diff_output_unidiff.c > > if (left_len == 1 && right_len == 1) { > > rc = fprintf(dest, "@@ -%d +%d @@%s%s\n", > > left_start, right_start, > > - prototype ? " " : "", > > - prototype ? : ""); > > + state->prototype ? " " : "", > > + state->prototype ? : ""); > > That is not C. I had to look it up: "x ? : y" is a GCC extension. > Something for a separate clean-up commit, I guess. This is implicitly fixed by using a static buffer instead of a pointer: state->prototype[0] ? state->prototype : ""); I believe the diff code is using this extension in more places. Something to tidy up later. diff 86b603da3068dce115470492279dc6f86f17f60b /home/stsp/src/diff blob - 699cdbdee8d7c7fa45ac1a2cf93547d0a2c9fdc8 file + lib/diff_internal.h --- lib/diff_internal.h +++ lib/diff_internal.h @@ -173,7 +173,9 @@ int diff_output_lines(struct diff_output_info *output_ int diff_output_trailing_newline_msg(struct diff_output_info *outinfo, FILE *dest, const struct diff_chunk *c); -int diff_output_match_function_prototype(char **prototype, +#define DIFF_FUNCTION_CONTEXT_SIZE 55 +int diff_output_match_function_prototype(char *prototype, size_t prototype_size, + int *last_prototype_idx, const struct diff_result *result, const struct diff_chunk_context *cc); blob - e286c6225a8b2095cd86a33a23e4c4031b3f3a0c file + lib/diff_output.c --- lib/diff_output.c +++ lib/diff_output.c @@ -236,68 +236,67 @@ diff_output_trailing_newline_msg(struct diff_output_in } static bool -is_function_prototype(const char *buf) +is_function_prototype(unsigned char ch) { - return isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$'; + return (isalpha(ch) || ch == '_' || ch == '$'); } -#define FUNCTION_CONTEXT_SIZE 55 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) int -diff_output_match_function_prototype(char **prototype, - const struct diff_result *result, +diff_output_match_function_prototype(char *prototype, size_t prototype_size, + int *last_prototype_idx, const struct diff_result *result, const struct diff_chunk_context *cc) { struct diff_atom *start_atom, *atom; const struct diff_data *data; - unsigned char buf[FUNCTION_CONTEXT_SIZE]; + unsigned char buf[DIFF_FUNCTION_CONTEXT_SIZE]; char *state = NULL; - int rc, i; + int rc, i, ch; - *prototype = NULL; - if (result->left->atoms.len > 0 && cc->left.start > 0) { data = result->left; start_atom = &data->atoms.head[cc->left.start - 1]; - } else if (result->right->atoms.len > 0 && cc->right.start > 0) { - data = result->right; - start_atom = &data->atoms.head[cc->right.start - 1]; } else return DIFF_RC_OK; diff_data_foreach_atom_backwards_from(start_atom, atom, data) { - for (i = 0; i < atom->len && i < sizeof(buf) - 1; i++) { - unsigned int ch; + int atom_idx = diff_atom_root_idx(data, atom); + if (atom_idx < *last_prototype_idx) + break; + rc = get_atom_byte(&ch, atom, 0); + if (rc) + return rc; + buf[0] = (unsigned char)ch; + if (!is_function_prototype(buf[0])) + continue; + for (i = 1; i < atom->len && i < sizeof(buf) - 1; i++) { rc = get_atom_byte(&ch, atom, i); if (rc) return rc; if (ch == '\n') break; - buf[i] = ch; + buf[i] = (unsigned char)ch; } buf[i] = '\0'; - if (is_function_prototype(buf)) { - if (begins_with(buf, "private:")) { - if (!state) - state = " (private)"; - } else if (begins_with(buf, "protected:")) { - if (!state) - state = " (protected)"; - } else if (begins_with(buf, "public:")) { - if (!state) - state = " (public)"; - } else { - if (state) /* don't care about truncation */ - strlcat(buf, state, sizeof(buf)); - *prototype = strdup(buf); - if (*prototype == NULL) - return ENOMEM; - return DIFF_RC_OK; - } + if (begins_with(buf, "private:")) { + if (!state) + state = " (private)"; + } else if (begins_with(buf, "protected:")) { + if (!state) + state = " (protected)"; + } else if (begins_with(buf, "public:")) { + if (!state) + state = " (public)"; + } else { + if (state) /* don't care about truncation */ + strlcat(buf, state, sizeof(buf)); + strlcpy(prototype, buf, prototype_size); + break; } } + *last_prototype_idx = diff_atom_root_idx(data, start_atom); return DIFF_RC_OK; } blob - 0c30eeafe6f5d012f49287395a528b8490816ccd file + lib/diff_output_unidiff.c --- lib/diff_output_unidiff.c +++ lib/diff_output_unidiff.c @@ -20,6 +20,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <assert.h> #include <arraylist.h> @@ -189,6 +190,8 @@ diff_chunk_context_load_change(struct diff_chunk_conte struct diff_output_unidiff_state { bool header_printed; + char prototype[DIFF_FUNCTION_CONTEXT_SIZE]; + int last_prototype_idx; }; struct diff_output_unidiff_state * @@ -206,6 +209,8 @@ void diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state) { state->header_printed = false; + memset(state->prototype, 0, sizeof(state->prototype)); + state->last_prototype_idx = 0; } void @@ -224,7 +229,6 @@ output_unidiff_chunk(struct diff_output_info *outinfo, { int rc, left_start, left_len, right_start, right_len; off_t outoff = 0, *offp; - char *prototype = NULL; if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right)) return DIFF_RC_OK; @@ -279,7 +283,8 @@ output_unidiff_chunk(struct diff_output_info *outinfo, right_start = cc->right.start + 1; if (show_function_prototypes) { - rc = diff_output_match_function_prototype(&prototype, + rc = diff_output_match_function_prototype(state->prototype, + sizeof(state->prototype), &state->last_prototype_idx, result, cc); if (rc) return rc; @@ -288,25 +293,24 @@ output_unidiff_chunk(struct diff_output_info *outinfo, if (left_len == 1 && right_len == 1) { rc = fprintf(dest, "@@ -%d +%d @@%s%s\n", left_start, right_start, - prototype ? " " : "", - prototype ? : ""); + state->prototype[0] ? " " : "", + state->prototype[0] ? state->prototype : ""); } else if (left_len == 1 && right_len != 1) { rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n", left_start, right_start, right_len, - prototype ? " " : "", - prototype ? : ""); + state->prototype[0] ? " " : "", + state->prototype[0] ? state->prototype : ""); } else if (left_len != 1 && right_len == 1) { rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n", left_start, left_len, right_start, - prototype ? " " : "", - prototype ? : ""); + state->prototype[0] ? " " : "", + state->prototype[0] ? state->prototype : ""); } else { rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n", left_start, left_len, right_start, right_len, - prototype ? " " : "", - prototype ? : ""); + state->prototype[0] ? " " : "", + state->prototype[0] ? state->prototype : ""); } - free(prototype); if (rc < 0) return errno; if (outinfo) {
diff: limit search effort for function prototypes