"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
Re: expose chunk offsets in the diff API
To:
Game of Trees <gameoftrees@openbsd.org>
Date:
Mon, 20 Feb 2023 00:18:00 +1100

Download raw body.

Thread
On 23-02-20 12:05AM, Mark Jamsek wrote:
> As discussed with stsp, if we expose each chunk's offset, we can more
> efficiently seek to where we need to start parsing lines rather than
> skipping each line till we get there. The below diff makes this change.

I completely forgot to explain that atom->pos is indeed set whether
mmap'd or not--it is just that atom->at is _only_ set when mmap'd.
This is done in the atomize routines in diff_atomize_text.c.
Perhaps the comment in the diff_main.h header could be more precise?

> This also requires tightening the check for chunks with only added lines
> so we don't grab the offset to unchanged lines (i.e., right_count ==
> left_count). On that note, should we also expose the diff_chunk_type()
> routine?
> 
> Regress is obviously unaffected by this change.
> 
> diffstat /home/mark/src/got
>  M  lib/diff_output.h          |   2+   0-
>  M  lib/diff_output_unidiff.c  |  12+   0-
>  M  lib/worktree.c             |  10+  14-
> 
> 3 files changed, 24 insertions(+), 14 deletions(-)
> 
> diff /home/mark/src/got
> commit - 53e553e8ded01524f60c018b2266acc39af30669
> path + /home/mark/src/got
> blob - e2f352bf17421cfdc1dd349b9740f53d91599b2e
> file + lib/diff_output.h
> --- lib/diff_output.h
> +++ lib/diff_output.h
> @@ -74,6 +74,8 @@ struct diff_chunk *diff_chunk_get(const struct diff_re
>  int diff_chunk_get_right_end(const struct diff_chunk *c,
>  			     const struct diff_result *r,
>  			     int context_lines);
> +off_t diff_chunk_get_left_start_pos(const struct diff_chunk *c);
> +off_t diff_chunk_get_right_start_pos(const struct diff_chunk *c);
>  struct diff_chunk *diff_chunk_get(const struct diff_result *r, int chunk_idx);
>  int diff_chunk_get_left_count(struct diff_chunk *c);
>  int diff_chunk_get_right_count(struct diff_chunk *c);
> blob - b20dabf6658691770d21bfe3c1227ae815223474
> file + lib/diff_output_unidiff.c
> --- lib/diff_output_unidiff.c
> +++ lib/diff_output_unidiff.c
> @@ -30,6 +30,18 @@ bool
>  #include "diff_internal.h"
>  #include "diff_debug.h"
>  
> +off_t
> +diff_chunk_get_left_start_pos(const struct diff_chunk *c)
> +{
> +	return c->left_start->pos;
> +}
> +
> +off_t
> +diff_chunk_get_right_start_pos(const struct diff_chunk *c)
> +{
> +	return c->right_start->pos;
> +}
> +
>  bool
>  diff_chunk_context_empty(const struct diff_chunk_context *cc)
>  {
> blob - 2d75c470b84a9c0f07ca7ccd39595267ab24fd49
> file + lib/worktree.c
> --- lib/worktree.c
> +++ lib/worktree.c
> @@ -1557,16 +1557,12 @@ get_modified_file_content_status(unsigned char *status
>  	if (err)
>  		goto done;
>  
> -	if (fseek(ondisk_file, 0L, SEEK_SET) == -1) {
> -		err = got_ferror(ondisk_file, GOT_ERR_IO);
> -		goto done;
> -	}
> -
>  	r = diffreg_result->result;
>  
>  	for (n = 0; n < r->chunks.len; n += nchunks_parsed) {
>  		struct diff_chunk *c;
>  		struct diff_chunk_context cc = {};
> +		off_t pos;
>  		int clc, crc;
>  
>  		/*
> @@ -1577,20 +1573,20 @@ get_modified_file_content_status(unsigned char *status
>  		clc = diff_chunk_get_left_count(c);
>  		crc = diff_chunk_get_right_count(c);
>  
> -		if (!crc && clc) {
> +		if (!crc || crc == clc) {
>  			nchunks_parsed = 1;
> -			continue;  /* removed lines */
> +			continue;  /* removed or unchanged lines */
>  		}
>  
> +		pos = diff_chunk_get_right_start_pos(c);
> +		if (fseek(ondisk_file, pos, SEEK_SET) == -1) {
> +			err = got_ferror(ondisk_file, GOT_ERR_IO);
> +			goto done;
> +		}
> +
>  		diff_chunk_context_load_change(&cc, &nchunks_parsed, r, n, 0);
> +		ln = cc.right.start;
>  
> -		while (ln < cc.right.start) {
> -			err = skip_one_line(ondisk_file);
> -			if (err)
> -				goto done;
> -			++ln;
> -		}
> -
>  		while (ln < cc.right.end) {
>  			linelen = getline(&line, &linesize, ondisk_file);
>  			if (linelen == -1) {
> 
> -- 
> Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
> GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68

-- 
Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68