"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
diff: limit search effort for function prototypes
To:
gameoftrees@openbsd.org
Date:
Mon, 7 Dec 2020 15:00:24 +0100

Download raw body.

Thread
When I ported code for the '-p' option from diff(1) to our new diff code,
I did not add an upper bound for the number of lines scanned upwards in
the search for a function prototype.

This can lead to excessive run-time of the new diff code if display
of function prototypes is enabled (which Got enables by default).

I found a problematic case in the Git repository of the Tor project:

  git clone --bare https://git.torproject.org/tor.git
  cd tor.git && got log -l1 -p -c 9c83cd1993b6ed98301020e5a170df9d5427a1e6

Diffing this commit currently takes a very long time.

This limits the upwards search to 1000 lines, and searches just the
first byte of each line being scanned.

The patch can be applied to both diff.git and got.git.

ok?

diff 86b603da3068dce115470492279dc6f86f17f60b /home/stsp/src/diff
blob - e286c6225a8b2095cd86a33a23e4c4031b3f3a0c
file + lib/diff_output.c
--- lib/diff_output.c
+++ lib/diff_output.c
@@ -236,11 +236,12 @@ diff_output_trailing_newline_msg(struct diff_output_in
 }
 
 static bool
-is_function_prototype(const char *buf)
+is_function_prototype(unsigned char ch)
 {
-	return isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$';
+	return (isalpha(ch) || ch == '_' || ch == '$');
 }
 
+#define FUNCTION_CONTEXT_SEARCH_MAX	1000
 #define FUNCTION_CONTEXT_SIZE	55
 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
 
@@ -253,7 +254,7 @@ diff_output_match_function_prototype(char **prototype,
 	const struct diff_data *data;
 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
 	char *state = NULL;
-	int rc, i;
+	int rc, i, ch, nsearched = 0;
 
 	*prototype = NULL;
 
@@ -267,34 +268,39 @@ diff_output_match_function_prototype(char **prototype,
 		return DIFF_RC_OK;
 
 	diff_data_foreach_atom_backwards_from(start_atom, atom, data) {
-		for (i = 0; i < atom->len && i < sizeof(buf) - 1; i++) {
-			unsigned int ch;
+		if (nsearched++ > FUNCTION_CONTEXT_SEARCH_MAX)
+			break;
+		rc = get_atom_byte(&ch, atom, 0);
+		if (rc)
+			return rc;
+		buf[0] = (unsigned char)ch;
+		if (!is_function_prototype(buf[0]))
+			continue;
+		for (i = 1; i < atom->len && i < sizeof(buf) - 1; i++) {
 			rc = get_atom_byte(&ch, atom, i);
 			if (rc)
 				return rc;
 			if (ch == '\n')
 				break;
-			buf[i] = ch;
+			buf[i] = (unsigned char)ch;
 		}
 		buf[i] = '\0';
-		if (is_function_prototype(buf)) {
-			if (begins_with(buf, "private:")) {
-				if (!state)
-					state = " (private)";
-			} else if (begins_with(buf, "protected:")) {
-				if (!state)
-					state = " (protected)";
-			} else if (begins_with(buf, "public:")) {
-				if (!state)
-					state = " (public)";
-			} else {
-				if (state)  /* don't care about truncation */
-					strlcat(buf, state, sizeof(buf));
-				*prototype = strdup(buf);
-				if (*prototype == NULL)
-					return ENOMEM;
-				return DIFF_RC_OK;
-			}
+		if (begins_with(buf, "private:")) {
+			if (!state)
+				state = " (private)";
+		} else if (begins_with(buf, "protected:")) {
+			if (!state)
+				state = " (protected)";
+		} else if (begins_with(buf, "public:")) {
+			if (!state)
+				state = " (public)";
+		} else {
+			if (state)  /* don't care about truncation */
+				strlcat(buf, state, sizeof(buf));
+			*prototype = strdup(buf);
+			if (*prototype == NULL)
+				return ENOMEM;
+			return DIFF_RC_OK;
 		}
 	}