From: Mark Jamsek Subject: Re: diff: Function names appearing inside diff context To: Tom Jones Cc: gameoftrees@openbsd.org Date: Fri, 23 Sep 2022 23:52:31 +1000 On 22-09-23 11:25PM, Mark Jamsek wrote: > On 22-09-21 04:58PM, Tom Jones wrote: > > Hi, > > > > There is an issue that if a function name appears within the context it > > isn't discovered correctly and we instead get the previously declared > > function in the file. > > > > I am not sure how to fix this. I can't quite grasp why the range for the > > left side contains the context already. I have included a patch that > > resolves a single case, but I think I am missing how to discover the > > correct offset. > > > > The patch also includes tests that exercise this case. gdiff exhibits > > this error, but openbsd diff doesn't seem to. > > > > I'd love some advice on how to construct the start of the search > > correctly. > > > > - Tom > > The below diff does this; we start scanning for the enclosing function > at the first changed line. We could arguably scan from the line > immediately preceding the first changed line, but that would miss the > case where the line with the function name is changed. This works with > your test case using various numbers of context lines, but if you have > other test cases, further testing would be good! I don't think it can happen, tbh, because we'll fail this check first and bail early instead: if (result->left->atoms.len > 0 && cc->left.start > 0) { data = result->left; start_atom = &data->atoms.head[idx]; } else return DIFF_RC_OK; but to be safe and make sure we don't read OOB, we should MIN the start index with this change from the previous diff: - int rc, i, ch, idx = cc->left.start + (ncontext ? ncontext : 0); + int rc, i, ch, idx; + idx = MIN(result->left->atoms.len - 1, + cc->left.start + (ncontext ? ncontext : 0)); + Complete diff: diff refs/heads/main refs/heads/fix/diffp commit - 2ad87fffe4c7553e1593ff3f3ece274cf3a48931 commit + dc49f134351deb21cfeb7cbe220b80b20480847e blob - 396f264aae799c02ce62d47c6762b7dc5bd05331 blob + be72763dedcb12e2baabb6fda64e06d2fcf5f598 --- lib/diff_internal.h +++ lib/diff_internal.h @@ -177,7 +177,8 @@ int diff_output_match_function_prototype(char *prototy int diff_output_match_function_prototype(char *prototype, size_t prototype_size, int *last_prototype_idx, const struct diff_result *result, - const struct diff_chunk_context *cc); + const struct diff_chunk_context *cc, + unsigned int ncontext); struct diff_output_info *diff_output_info_alloc(void); blob - 3047221aa5a15e4fc2a5dea8267e97864da21264 blob + f69d508451ad4771f8aa855adba6ed4d17f06e92 --- lib/diff_output.c +++ lib/diff_output.c @@ -270,17 +270,20 @@ diff_output_match_function_prototype(char *prototype, int diff_output_match_function_prototype(char *prototype, size_t prototype_size, int *last_prototype_idx, const struct diff_result *result, - const struct diff_chunk_context *cc) + const struct diff_chunk_context *cc, unsigned int ncontext) { struct diff_atom *start_atom, *atom; const struct diff_data *data; unsigned char buf[DIFF_FUNCTION_CONTEXT_SIZE]; const char *state = NULL; - int rc, i, ch; + int rc, i, ch, idx; + idx = MIN(result->left->atoms.len - 1, + cc->left.start + (ncontext ? ncontext : 0)); + if (result->left->atoms.len > 0 && cc->left.start > 0) { data = result->left; - start_atom = &data->atoms.head[cc->left.start - 1]; + start_atom = &data->atoms.head[idx]; } else return DIFF_RC_OK; blob - 18bff74063081228c1c525cce2b35ad26a527bf8 blob + 74b22eb8207ab2c5f154a22229051900a3d7b1ce --- lib/diff_output_unidiff.c +++ lib/diff_output_unidiff.c @@ -225,7 +225,7 @@ output_unidiff_chunk(struct diff_output_info *outinfo, const struct diff_input_info *info, const struct diff_result *result, bool print_header, bool show_function_prototypes, - const struct diff_chunk_context *cc) + const struct diff_chunk_context *cc, unsigned int ncontext) { int rc, left_start, left_len, right_start, right_len; off_t outoff = 0, *offp; @@ -292,7 +292,7 @@ output_unidiff_chunk(struct diff_output_info *outinfo, if (show_function_prototypes) { rc = diff_output_match_function_prototype(state->prototype, sizeof(state->prototype), &state->last_prototype_idx, - result, cc); + result, cc, ncontext); if (rc) return rc; } @@ -416,7 +416,7 @@ diff_output_unidiff_chunk(struct diff_output_info **ou } return output_unidiff_chunk(outinfo, dest, state, info, - result, false, show_function_prototypes, cc); + result, false, show_function_prototypes, cc, 0); } int @@ -569,7 +569,7 @@ diff_output_unidiff(struct diff_output_info **output_i " print left %d-%d right %d-%d\n", cc.left.start, cc.left.end, cc.right.start, cc.right.end); output_unidiff_chunk(outinfo, dest, state, info, result, - true, show_function_prototypes, &cc); + true, show_function_prototypes, &cc, context_lines); cc = next; debug("new unprinted chunk is left %d-%d right %d-%d\n", cc.left.start, cc.left.end, cc.right.start, cc.right.end); @@ -577,7 +577,7 @@ diff_output_unidiff(struct diff_output_info **output_i if (!diff_chunk_context_empty(&cc)) output_unidiff_chunk(outinfo, dest, state, info, result, - true, show_function_prototypes, &cc); + true, show_function_prototypes, &cc, context_lines); diff_output_unidiff_state_free(state); return DIFF_RC_OK; } -- Mark Jamsek GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68