From: Mark Jamsek Subject: Re: merge chokes, creates bogus conflicts To: Christian Weisgerber , gameoftrees@openbsd.org Date: Sun, 19 Feb 2023 18:38:41 +1100 On 23-02-18 04:45PM, Stefan Sperling wrote: > On Sun, Feb 19, 2023 at 01:57:23AM +1100, Mark Jamsek wrote: > > Updated diff incorporates all your suggestions. Thanks for the > > improvements :) > > Thanks! ok by me. We can add the diff_result performance tweak > on top once you or myself find time to implement it. The below diff implements this optimisation by using diff_result and bypassing the somewhat expensive diff format and output path. It also fixes a bug in the previous revision where the "from" file (f1) was always empty because the blob was consumed by the caller and needed a got_object_blob_rewind() call. So we were effectively adding the whole file thus making the process that much slower! Also, that update.sh change that I spent far too much time debating myself over has now reverted from "M foo" back to "C foo"--and I think this is actually right: In the test, we delete "foo" in the previous commit, so it actually is a new conflict. The previous approach of parsing the text diff for added lines turned the report from "C foo" to "M foo" because it wasn't forcing ASCII, thus the diff didn't have any '+' lines, only the "Binary files ... differ" line (i.e., no conflict markers). If we set the 'force_text_diff' flag in the previous revision, it does indeed report as a conflict. Thus this diff is not only an optimisation but also fixes two bugs :) diffstat /home/mark/src/got M lib/worktree.c | 72+ 45- M regress/cmdline/update.sh | 1+ 1- 2 files changed, 73 insertions(+), 46 deletions(-) diff /home/mark/src/got commit - e5b3f6fb20301629598555a3a5be9845c20cf42f path + /home/mark/src/got blob - 055da929b846e6b27b801620463ae896ca9faf0a file + lib/worktree.c --- lib/worktree.c +++ lib/worktree.c @@ -1513,6 +1513,8 @@ done: return err; } +static const struct got_error *skip_one_line(FILE *); + /* * Upgrade STATUS_MODIFY to STATUS_CONFLICT if a * conflict marker is found in newly added lines only. @@ -1522,77 +1524,102 @@ get_modified_file_content_status(unsigned char *status struct got_blob_object *blob, const char *path, struct stat *sb, FILE *ondisk_file) { - const struct got_error *err = NULL; + const struct got_error *err, *free_err; const char *markers[3] = { GOT_DIFF_CONFLICT_MARKER_BEGIN, GOT_DIFF_CONFLICT_MARKER_SEP, GOT_DIFF_CONFLICT_MARKER_END }; - FILE *f = NULL, *f1 = NULL; - int i = 0; + FILE *f1 = NULL; + struct got_diffreg_result *diffreg_result = NULL; + struct diff_result *r; + int nchunks_parsed, n, i = 0, ln = 0; char *line = NULL; size_t linesize = 0; ssize_t linelen; - off_t sz1 = 0; - if (*status == GOT_STATUS_MODIFY) { - f = got_opentemp(); - if (f == NULL) - return got_error_from_errno("got_opentemp"); + if (*status != GOT_STATUS_MODIFY) + return NULL; - f1 = got_opentemp(); - if (f1 == NULL) { - err = got_error_from_errno("got_opentemp"); + f1 = got_opentemp(); + if (f1 == NULL) + return got_error_from_errno("got_opentemp"); + + if (blob) { + got_object_blob_rewind(blob); + err = got_object_blob_dump_to_file(NULL, NULL, NULL, f1, blob); + if (err) goto done; + } + + err = got_diff_files(&diffreg_result, f1, 1, NULL, ondisk_file, + 1, NULL, 0, 0, 1, NULL, GOT_DIFF_ALGORITHM_MYERS); + if (err) + goto done; + + if (fseek(ondisk_file, 0L, SEEK_SET) == -1) { + err = got_ferror(ondisk_file, GOT_ERR_IO); + goto done; + } + + r = diffreg_result->result; + + for (n = 0; n < r->chunks.len; n += nchunks_parsed) { + struct diff_chunk *c; + struct diff_chunk_context cc = {}; + int clc, crc; + + /* + * We can optimise a little by advancing straight + * to the next chunk if this one has no added lines. + */ + c = diff_chunk_get(r, n); + clc = diff_chunk_get_left_count(c); + crc = diff_chunk_get_right_count(c); + + if (!crc && clc) { + nchunks_parsed = 1; + continue; /* removed lines */ } - if (blob) { - err = got_object_blob_dump_to_file(&sz1, NULL, NULL, - f1, blob); + diff_chunk_context_load_change(&cc, &nchunks_parsed, r, n, 0); + + while (ln < cc.right.start) { + err = skip_one_line(ondisk_file); if (err) goto done; + ++ln; } - err = got_diff_blob_file(blob, f1, sz1, NULL, ondisk_file, 1, - sb, path, GOT_DIFF_ALGORITHM_MYERS, 0, 0, 0, NULL, f); - if (err) - goto done; - - if (fflush(f) == EOF) { - err = got_error_from_errno("fflush"); - goto done; - } - if (fseeko(f, 0L, SEEK_SET) == -1) { - err = got_error_from_errno("fseek"); - goto done; - } - } - - while (*status == GOT_STATUS_MODIFY) { - linelen = getline(&line, &linesize, f); - if (linelen == -1) { - if (feof(f)) + while (ln < cc.right.end) { + linelen = getline(&line, &linesize, ondisk_file); + if (linelen == -1) { + if (feof(ondisk_file)) + break; + err = got_ferror(ondisk_file, GOT_ERR_IO); break; - err = got_ferror(f, GOT_ERR_IO); - break; - } + } - if (*line == '+' && - strncmp(line + 1, markers[i], strlen(markers[i])) == 0) { - if (strcmp(markers[i], GOT_DIFF_CONFLICT_MARKER_END) - == 0) - *status = GOT_STATUS_CONFLICT; - else - i++; + if (line && strncmp(line, markers[i], + strlen(markers[i])) == 0) { + if (strcmp(markers[i], + GOT_DIFF_CONFLICT_MARKER_END) == 0) { + *status = GOT_STATUS_CONFLICT; + goto done; + } else + i++; + } + ++ln; } } done: free(line); - if (f != NULL && fclose(f) == EOF && err == NULL) - err = got_error_from_errno("fclose"); if (f1 != NULL && fclose(f1) == EOF && err == NULL) err = got_error_from_errno("fclose"); + free_err = got_diffreg_result_free(diffreg_result); + if (err == NULL) + err = free_err; return err; } blob - 1ab6bfe75bae74735e4040c6d98ea1dea7cd9dc3 file + regress/cmdline/update.sh --- regress/cmdline/update.sh +++ regress/cmdline/update.sh @@ -2970,7 +2970,7 @@ test_update_binary_file() { fi (cd $testroot/wt && got status > $testroot/stdout) - echo 'M foo' > $testroot/stdout.expected + echo 'C foo' > $testroot/stdout.expected echo -n '? ' >> $testroot/stdout.expected (cd $testroot/wt && ls foo-1-* >> $testroot/stdout.expected) echo -n '? ' >> $testroot/stdout.expected -- Mark Jamsek GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68