"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: diff: limit search effort for function prototypes
To:
Christian Weisgerber <naddy@mips.inka.de>
Cc:
gameoftrees@openbsd.org
Date:
Wed, 9 Dec 2020 23:22:52 +0100

Download raw body.

Thread
On Wed, Dec 09, 2020 at 10:45:52PM +0100, Christian Weisgerber wrote:
> Stefan Sperling:
> 
> > > GNU diff caches the last function prototype and the line number
> > > from which it started searching.
> > 
> > Yes, we could. This is a great idea. Implemented below. This approach
> > is also much faster than my previous patch.
> > 
> > So with this diff we only search the 'left' version of the file for
> > prototypes. But that's probably how other diff tools do it as well.
> 
> Yes, GNU diff does this as well.
> 
> > ok?
> 
> Basically, yes.
> 
> That's a malloc/free for each prototype.  Would a static buffer in
> the state be better?

Sure. See below. It is slightly faster on OpenBSD because it reduces
free(3) checking overhead.

> > --- lib/diff_output_unidiff.c
> > +++ lib/diff_output_unidiff.c
> >  	if (left_len == 1 && right_len == 1) {
> >  		rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
> >  			left_start, right_start,
> > -			prototype ? " " : "",
> > -			prototype ? : "");
> > +			state->prototype ? " " : "",
> > +			state->prototype ? : "");
> 
> That is not C.  I had to look it up: "x ? : y" is a GCC extension.
> Something for a separate clean-up commit, I guess.

This is implicitly fixed by using a static buffer instead of a pointer:

	state->prototype[0] ? state->prototype : "");

I believe the diff code is using this extension in more places.
Something to tidy up later.

diff 86b603da3068dce115470492279dc6f86f17f60b /home/stsp/src/diff
blob - 699cdbdee8d7c7fa45ac1a2cf93547d0a2c9fdc8
file + lib/diff_internal.h
--- lib/diff_internal.h
+++ lib/diff_internal.h
@@ -173,7 +173,9 @@ int diff_output_lines(struct diff_output_info *output_
 int diff_output_trailing_newline_msg(struct diff_output_info *outinfo,
 				     FILE *dest,
 				     const struct diff_chunk *c);
-int diff_output_match_function_prototype(char **prototype,
+#define DIFF_FUNCTION_CONTEXT_SIZE	55
+int diff_output_match_function_prototype(char *prototype, size_t prototype_size,
+					 int *last_prototype_idx,
 					 const struct diff_result *result,
 					 const struct diff_chunk_context *cc);
 
blob - e286c6225a8b2095cd86a33a23e4c4031b3f3a0c
file + lib/diff_output.c
--- lib/diff_output.c
+++ lib/diff_output.c
@@ -236,68 +236,67 @@ diff_output_trailing_newline_msg(struct diff_output_in
 }
 
 static bool
-is_function_prototype(const char *buf)
+is_function_prototype(unsigned char ch)
 {
-	return isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$';
+	return (isalpha(ch) || ch == '_' || ch == '$');
 }
 
-#define FUNCTION_CONTEXT_SIZE	55
 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
 
 int
-diff_output_match_function_prototype(char **prototype,
-    const struct diff_result *result,
+diff_output_match_function_prototype(char *prototype, size_t prototype_size,
+    int *last_prototype_idx, const struct diff_result *result,
     const struct diff_chunk_context *cc)
 {
 	struct diff_atom *start_atom, *atom;
 	const struct diff_data *data;
-	unsigned char buf[FUNCTION_CONTEXT_SIZE];
+	unsigned char buf[DIFF_FUNCTION_CONTEXT_SIZE];
 	char *state = NULL;
-	int rc, i;
+	int rc, i, ch;
 
-	*prototype = NULL;
-
 	if (result->left->atoms.len > 0 && cc->left.start > 0) {
 		data = result->left;
 		start_atom = &data->atoms.head[cc->left.start - 1];
-	} else if (result->right->atoms.len > 0 && cc->right.start > 0) {
-		data = result->right;
-		start_atom = &data->atoms.head[cc->right.start - 1];
 	} else
 		return DIFF_RC_OK;
 
 	diff_data_foreach_atom_backwards_from(start_atom, atom, data) {
-		for (i = 0; i < atom->len && i < sizeof(buf) - 1; i++) {
-			unsigned int ch;
+		int atom_idx = diff_atom_root_idx(data, atom);
+		if (atom_idx < *last_prototype_idx)
+			break;
+		rc = get_atom_byte(&ch, atom, 0);
+		if (rc)
+			return rc;
+		buf[0] = (unsigned char)ch;
+		if (!is_function_prototype(buf[0]))
+			continue;
+		for (i = 1; i < atom->len && i < sizeof(buf) - 1; i++) {
 			rc = get_atom_byte(&ch, atom, i);
 			if (rc)
 				return rc;
 			if (ch == '\n')
 				break;
-			buf[i] = ch;
+			buf[i] = (unsigned char)ch;
 		}
 		buf[i] = '\0';
-		if (is_function_prototype(buf)) {
-			if (begins_with(buf, "private:")) {
-				if (!state)
-					state = " (private)";
-			} else if (begins_with(buf, "protected:")) {
-				if (!state)
-					state = " (protected)";
-			} else if (begins_with(buf, "public:")) {
-				if (!state)
-					state = " (public)";
-			} else {
-				if (state)  /* don't care about truncation */
-					strlcat(buf, state, sizeof(buf));
-				*prototype = strdup(buf);
-				if (*prototype == NULL)
-					return ENOMEM;
-				return DIFF_RC_OK;
-			}
+		if (begins_with(buf, "private:")) {
+			if (!state)
+				state = " (private)";
+		} else if (begins_with(buf, "protected:")) {
+			if (!state)
+				state = " (protected)";
+		} else if (begins_with(buf, "public:")) {
+			if (!state)
+				state = " (public)";
+		} else {
+			if (state)  /* don't care about truncation */
+				strlcat(buf, state, sizeof(buf));
+			strlcpy(prototype, buf, prototype_size);
+			break;
 		}
 	}
 
+	*last_prototype_idx = diff_atom_root_idx(data, start_atom);
 	return DIFF_RC_OK;
 }
 
blob - 0c30eeafe6f5d012f49287395a528b8490816ccd
file + lib/diff_output_unidiff.c
--- lib/diff_output_unidiff.c
+++ lib/diff_output_unidiff.c
@@ -20,6 +20,7 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <assert.h>
 
 #include <arraylist.h>
@@ -189,6 +190,8 @@ diff_chunk_context_load_change(struct diff_chunk_conte
 
 struct diff_output_unidiff_state {
 	bool header_printed;
+	char prototype[DIFF_FUNCTION_CONTEXT_SIZE];
+	int last_prototype_idx;
 };
 
 struct diff_output_unidiff_state *
@@ -206,6 +209,8 @@ void
 diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state)
 {
 	state->header_printed = false;
+	memset(state->prototype, 0, sizeof(state->prototype));
+	state->last_prototype_idx = 0;
 }
 
 void
@@ -224,7 +229,6 @@ output_unidiff_chunk(struct diff_output_info *outinfo,
 {
 	int rc, left_start, left_len, right_start, right_len;
 	off_t outoff = 0, *offp;
-	char *prototype = NULL;
 
 	if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right))
 		return DIFF_RC_OK;
@@ -279,7 +283,8 @@ output_unidiff_chunk(struct diff_output_info *outinfo,
 		right_start = cc->right.start + 1;
 
 	if (show_function_prototypes) {
-		rc = diff_output_match_function_prototype(&prototype,
+		rc = diff_output_match_function_prototype(state->prototype,
+		    sizeof(state->prototype), &state->last_prototype_idx,
 		    result, cc);
 		if (rc)
 			return rc;
@@ -288,25 +293,24 @@ output_unidiff_chunk(struct diff_output_info *outinfo,
 	if (left_len == 1 && right_len == 1) {
 		rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
 			left_start, right_start,
-			prototype ? " " : "",
-			prototype ? : "");
+			state->prototype[0] ? " " : "",
+			state->prototype[0] ? state->prototype : "");
 	} else if (left_len == 1 && right_len != 1) {
 		rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n",
 			left_start, right_start, right_len,
-			prototype ? " " : "",
-			prototype ? : "");
+			state->prototype[0] ? " " : "",
+			state->prototype[0] ? state->prototype : "");
 	} else if (left_len != 1 && right_len == 1) {
 		rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n",
 			left_start, left_len, right_start,
-			prototype ? " " : "",
-			prototype ? : "");
+			state->prototype[0] ? " " : "",
+			state->prototype[0] ? state->prototype : "");
 	} else {
 		rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n",
 			left_start, left_len, right_start, right_len,
-			prototype ? " " : "",
-			prototype ? : "");
+			state->prototype[0] ? " " : "",
+			state->prototype[0] ? state->prototype : "");
 	}
-	free(prototype);
 	if (rc < 0)
 		return errno;
 	if (outinfo) {