From: Mark Jamsek Subject: expose chunk offsets in the diff API To: Game of Trees Date: Mon, 20 Feb 2023 00:05:53 +1100 As discussed with stsp, if we expose each chunk's offset, we can more efficiently seek to where we need to start parsing lines rather than skipping each line till we get there. The below diff makes this change. This also requires tightening the check for chunks with only added lines so we don't grab the offset to unchanged lines (i.e., right_count == left_count). On that note, should we also expose the diff_chunk_type() routine? Regress is obviously unaffected by this change. diffstat /home/mark/src/got M lib/diff_output.h | 2+ 0- M lib/diff_output_unidiff.c | 12+ 0- M lib/worktree.c | 10+ 14- 3 files changed, 24 insertions(+), 14 deletions(-) diff /home/mark/src/got commit - 53e553e8ded01524f60c018b2266acc39af30669 path + /home/mark/src/got blob - e2f352bf17421cfdc1dd349b9740f53d91599b2e file + lib/diff_output.h --- lib/diff_output.h +++ lib/diff_output.h @@ -74,6 +74,8 @@ struct diff_chunk *diff_chunk_get(const struct diff_re int diff_chunk_get_right_end(const struct diff_chunk *c, const struct diff_result *r, int context_lines); +off_t diff_chunk_get_left_start_pos(const struct diff_chunk *c); +off_t diff_chunk_get_right_start_pos(const struct diff_chunk *c); struct diff_chunk *diff_chunk_get(const struct diff_result *r, int chunk_idx); int diff_chunk_get_left_count(struct diff_chunk *c); int diff_chunk_get_right_count(struct diff_chunk *c); blob - b20dabf6658691770d21bfe3c1227ae815223474 file + lib/diff_output_unidiff.c --- lib/diff_output_unidiff.c +++ lib/diff_output_unidiff.c @@ -30,6 +30,18 @@ bool #include "diff_internal.h" #include "diff_debug.h" +off_t +diff_chunk_get_left_start_pos(const struct diff_chunk *c) +{ + return c->left_start->pos; +} + +off_t +diff_chunk_get_right_start_pos(const struct diff_chunk *c) +{ + return c->right_start->pos; +} + bool diff_chunk_context_empty(const struct diff_chunk_context *cc) { blob - 2d75c470b84a9c0f07ca7ccd39595267ab24fd49 file + lib/worktree.c --- lib/worktree.c +++ lib/worktree.c @@ -1557,16 +1557,12 @@ get_modified_file_content_status(unsigned char *status if (err) goto done; - if (fseek(ondisk_file, 0L, SEEK_SET) == -1) { - err = got_ferror(ondisk_file, GOT_ERR_IO); - goto done; - } - r = diffreg_result->result; for (n = 0; n < r->chunks.len; n += nchunks_parsed) { struct diff_chunk *c; struct diff_chunk_context cc = {}; + off_t pos; int clc, crc; /* @@ -1577,20 +1573,20 @@ get_modified_file_content_status(unsigned char *status clc = diff_chunk_get_left_count(c); crc = diff_chunk_get_right_count(c); - if (!crc && clc) { + if (!crc || crc == clc) { nchunks_parsed = 1; - continue; /* removed lines */ + continue; /* removed or unchanged lines */ } + pos = diff_chunk_get_right_start_pos(c); + if (fseek(ondisk_file, pos, SEEK_SET) == -1) { + err = got_ferror(ondisk_file, GOT_ERR_IO); + goto done; + } + diff_chunk_context_load_change(&cc, &nchunks_parsed, r, n, 0); + ln = cc.right.start; - while (ln < cc.right.start) { - err = skip_one_line(ondisk_file); - if (err) - goto done; - ++ln; - } - while (ln < cc.right.end) { linelen = getline(&line, &linesize, ondisk_file); if (linelen == -1) { -- Mark Jamsek GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68