From: Stefan Sperling Subject: diff: limit search effort for function prototypes To: gameoftrees@openbsd.org Date: Mon, 7 Dec 2020 15:00:24 +0100 When I ported code for the '-p' option from diff(1) to our new diff code, I did not add an upper bound for the number of lines scanned upwards in the search for a function prototype. This can lead to excessive run-time of the new diff code if display of function prototypes is enabled (which Got enables by default). I found a problematic case in the Git repository of the Tor project: git clone --bare https://git.torproject.org/tor.git cd tor.git && got log -l1 -p -c 9c83cd1993b6ed98301020e5a170df9d5427a1e6 Diffing this commit currently takes a very long time. This limits the upwards search to 1000 lines, and searches just the first byte of each line being scanned. The patch can be applied to both diff.git and got.git. ok? diff 86b603da3068dce115470492279dc6f86f17f60b /home/stsp/src/diff blob - e286c6225a8b2095cd86a33a23e4c4031b3f3a0c file + lib/diff_output.c --- lib/diff_output.c +++ lib/diff_output.c @@ -236,11 +236,12 @@ diff_output_trailing_newline_msg(struct diff_output_in } static bool -is_function_prototype(const char *buf) +is_function_prototype(unsigned char ch) { - return isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$'; + return (isalpha(ch) || ch == '_' || ch == '$'); } +#define FUNCTION_CONTEXT_SEARCH_MAX 1000 #define FUNCTION_CONTEXT_SIZE 55 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) @@ -253,7 +254,7 @@ diff_output_match_function_prototype(char **prototype, const struct diff_data *data; unsigned char buf[FUNCTION_CONTEXT_SIZE]; char *state = NULL; - int rc, i; + int rc, i, ch, nsearched = 0; *prototype = NULL; @@ -267,34 +268,39 @@ diff_output_match_function_prototype(char **prototype, return DIFF_RC_OK; diff_data_foreach_atom_backwards_from(start_atom, atom, data) { - for (i = 0; i < atom->len && i < sizeof(buf) - 1; i++) { - unsigned int ch; + if (nsearched++ > FUNCTION_CONTEXT_SEARCH_MAX) + break; + rc = get_atom_byte(&ch, atom, 0); + if (rc) + return rc; + buf[0] = (unsigned char)ch; + if (!is_function_prototype(buf[0])) + continue; + for (i = 1; i < atom->len && i < sizeof(buf) - 1; i++) { rc = get_atom_byte(&ch, atom, i); if (rc) return rc; if (ch == '\n') break; - buf[i] = ch; + buf[i] = (unsigned char)ch; } buf[i] = '\0'; - if (is_function_prototype(buf)) { - if (begins_with(buf, "private:")) { - if (!state) - state = " (private)"; - } else if (begins_with(buf, "protected:")) { - if (!state) - state = " (protected)"; - } else if (begins_with(buf, "public:")) { - if (!state) - state = " (public)"; - } else { - if (state) /* don't care about truncation */ - strlcat(buf, state, sizeof(buf)); - *prototype = strdup(buf); - if (*prototype == NULL) - return ENOMEM; - return DIFF_RC_OK; - } + if (begins_with(buf, "private:")) { + if (!state) + state = " (private)"; + } else if (begins_with(buf, "protected:")) { + if (!state) + state = " (protected)"; + } else if (begins_with(buf, "public:")) { + if (!state) + state = " (public)"; + } else { + if (state) /* don't care about truncation */ + strlcat(buf, state, sizeof(buf)); + *prototype = strdup(buf); + if (*prototype == NULL) + return ENOMEM; + return DIFF_RC_OK; } }