Download raw body.
expose chunk offsets in the diff API
On 23-02-20 12:05AM, Mark Jamsek wrote:
> As discussed with stsp, if we expose each chunk's offset, we can more
> efficiently seek to where we need to start parsing lines rather than
> skipping each line till we get there. The below diff makes this change.
I completely forgot to explain that atom->pos is indeed set whether
mmap'd or not--it is just that atom->at is _only_ set when mmap'd.
This is done in the atomize routines in diff_atomize_text.c.
Perhaps the comment in the diff_main.h header could be more precise?
> This also requires tightening the check for chunks with only added lines
> so we don't grab the offset to unchanged lines (i.e., right_count ==
> left_count). On that note, should we also expose the diff_chunk_type()
> routine?
>
> Regress is obviously unaffected by this change.
>
> diffstat /home/mark/src/got
> M lib/diff_output.h | 2+ 0-
> M lib/diff_output_unidiff.c | 12+ 0-
> M lib/worktree.c | 10+ 14-
>
> 3 files changed, 24 insertions(+), 14 deletions(-)
>
> diff /home/mark/src/got
> commit - 53e553e8ded01524f60c018b2266acc39af30669
> path + /home/mark/src/got
> blob - e2f352bf17421cfdc1dd349b9740f53d91599b2e
> file + lib/diff_output.h
> --- lib/diff_output.h
> +++ lib/diff_output.h
> @@ -74,6 +74,8 @@ struct diff_chunk *diff_chunk_get(const struct diff_re
> int diff_chunk_get_right_end(const struct diff_chunk *c,
> const struct diff_result *r,
> int context_lines);
> +off_t diff_chunk_get_left_start_pos(const struct diff_chunk *c);
> +off_t diff_chunk_get_right_start_pos(const struct diff_chunk *c);
> struct diff_chunk *diff_chunk_get(const struct diff_result *r, int chunk_idx);
> int diff_chunk_get_left_count(struct diff_chunk *c);
> int diff_chunk_get_right_count(struct diff_chunk *c);
> blob - b20dabf6658691770d21bfe3c1227ae815223474
> file + lib/diff_output_unidiff.c
> --- lib/diff_output_unidiff.c
> +++ lib/diff_output_unidiff.c
> @@ -30,6 +30,18 @@ bool
> #include "diff_internal.h"
> #include "diff_debug.h"
>
> +off_t
> +diff_chunk_get_left_start_pos(const struct diff_chunk *c)
> +{
> + return c->left_start->pos;
> +}
> +
> +off_t
> +diff_chunk_get_right_start_pos(const struct diff_chunk *c)
> +{
> + return c->right_start->pos;
> +}
> +
> bool
> diff_chunk_context_empty(const struct diff_chunk_context *cc)
> {
> blob - 2d75c470b84a9c0f07ca7ccd39595267ab24fd49
> file + lib/worktree.c
> --- lib/worktree.c
> +++ lib/worktree.c
> @@ -1557,16 +1557,12 @@ get_modified_file_content_status(unsigned char *status
> if (err)
> goto done;
>
> - if (fseek(ondisk_file, 0L, SEEK_SET) == -1) {
> - err = got_ferror(ondisk_file, GOT_ERR_IO);
> - goto done;
> - }
> -
> r = diffreg_result->result;
>
> for (n = 0; n < r->chunks.len; n += nchunks_parsed) {
> struct diff_chunk *c;
> struct diff_chunk_context cc = {};
> + off_t pos;
> int clc, crc;
>
> /*
> @@ -1577,20 +1573,20 @@ get_modified_file_content_status(unsigned char *status
> clc = diff_chunk_get_left_count(c);
> crc = diff_chunk_get_right_count(c);
>
> - if (!crc && clc) {
> + if (!crc || crc == clc) {
> nchunks_parsed = 1;
> - continue; /* removed lines */
> + continue; /* removed or unchanged lines */
> }
>
> + pos = diff_chunk_get_right_start_pos(c);
> + if (fseek(ondisk_file, pos, SEEK_SET) == -1) {
> + err = got_ferror(ondisk_file, GOT_ERR_IO);
> + goto done;
> + }
> +
> diff_chunk_context_load_change(&cc, &nchunks_parsed, r, n, 0);
> + ln = cc.right.start;
>
> - while (ln < cc.right.start) {
> - err = skip_one_line(ondisk_file);
> - if (err)
> - goto done;
> - ++ln;
> - }
> -
> while (ln < cc.right.end) {
> linelen = getline(&line, &linesize, ondisk_file);
> if (linelen == -1) {
>
> --
> Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
> GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68
--
Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68
expose chunk offsets in the diff API