"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
expose chunk offsets in the diff API
To:
Game of Trees <gameoftrees@openbsd.org>
Date:
Mon, 20 Feb 2023 00:05:53 +1100

Download raw body.

Thread
As discussed with stsp, if we expose each chunk's offset, we can more
efficiently seek to where we need to start parsing lines rather than
skipping each line till we get there. The below diff makes this change.

This also requires tightening the check for chunks with only added lines
so we don't grab the offset to unchanged lines (i.e., right_count ==
left_count). On that note, should we also expose the diff_chunk_type()
routine?

Regress is obviously unaffected by this change.

diffstat /home/mark/src/got
 M  lib/diff_output.h          |   2+   0-
 M  lib/diff_output_unidiff.c  |  12+   0-
 M  lib/worktree.c             |  10+  14-

3 files changed, 24 insertions(+), 14 deletions(-)

diff /home/mark/src/got
commit - 53e553e8ded01524f60c018b2266acc39af30669
path + /home/mark/src/got
blob - e2f352bf17421cfdc1dd349b9740f53d91599b2e
file + lib/diff_output.h
--- lib/diff_output.h
+++ lib/diff_output.h
@@ -74,6 +74,8 @@ struct diff_chunk *diff_chunk_get(const struct diff_re
 int diff_chunk_get_right_end(const struct diff_chunk *c,
 			     const struct diff_result *r,
 			     int context_lines);
+off_t diff_chunk_get_left_start_pos(const struct diff_chunk *c);
+off_t diff_chunk_get_right_start_pos(const struct diff_chunk *c);
 struct diff_chunk *diff_chunk_get(const struct diff_result *r, int chunk_idx);
 int diff_chunk_get_left_count(struct diff_chunk *c);
 int diff_chunk_get_right_count(struct diff_chunk *c);
blob - b20dabf6658691770d21bfe3c1227ae815223474
file + lib/diff_output_unidiff.c
--- lib/diff_output_unidiff.c
+++ lib/diff_output_unidiff.c
@@ -30,6 +30,18 @@ bool
 #include "diff_internal.h"
 #include "diff_debug.h"
 
+off_t
+diff_chunk_get_left_start_pos(const struct diff_chunk *c)
+{
+	return c->left_start->pos;
+}
+
+off_t
+diff_chunk_get_right_start_pos(const struct diff_chunk *c)
+{
+	return c->right_start->pos;
+}
+
 bool
 diff_chunk_context_empty(const struct diff_chunk_context *cc)
 {
blob - 2d75c470b84a9c0f07ca7ccd39595267ab24fd49
file + lib/worktree.c
--- lib/worktree.c
+++ lib/worktree.c
@@ -1557,16 +1557,12 @@ get_modified_file_content_status(unsigned char *status
 	if (err)
 		goto done;
 
-	if (fseek(ondisk_file, 0L, SEEK_SET) == -1) {
-		err = got_ferror(ondisk_file, GOT_ERR_IO);
-		goto done;
-	}
-
 	r = diffreg_result->result;
 
 	for (n = 0; n < r->chunks.len; n += nchunks_parsed) {
 		struct diff_chunk *c;
 		struct diff_chunk_context cc = {};
+		off_t pos;
 		int clc, crc;
 
 		/*
@@ -1577,20 +1573,20 @@ get_modified_file_content_status(unsigned char *status
 		clc = diff_chunk_get_left_count(c);
 		crc = diff_chunk_get_right_count(c);
 
-		if (!crc && clc) {
+		if (!crc || crc == clc) {
 			nchunks_parsed = 1;
-			continue;  /* removed lines */
+			continue;  /* removed or unchanged lines */
 		}
 
+		pos = diff_chunk_get_right_start_pos(c);
+		if (fseek(ondisk_file, pos, SEEK_SET) == -1) {
+			err = got_ferror(ondisk_file, GOT_ERR_IO);
+			goto done;
+		}
+
 		diff_chunk_context_load_change(&cc, &nchunks_parsed, r, n, 0);
+		ln = cc.right.start;
 
-		while (ln < cc.right.start) {
-			err = skip_one_line(ondisk_file);
-			if (err)
-				goto done;
-			++ln;
-		}
-
 		while (ln < cc.right.end) {
 			linelen = getline(&line, &linesize, ondisk_file);
 			if (linelen == -1) {

-- 
Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68