Download raw body.
expose chunk offsets in the diff API
On 23-02-19 06:34PM, Stefan Sperling wrote:
> On Mon, Feb 20, 2023 at 12:18:00AM +1100, Mark Jamsek wrote:
> > On 23-02-20 12:05AM, Mark Jamsek wrote:
> > > As discussed with stsp, if we expose each chunk's offset, we can more
> > > efficiently seek to where we need to start parsing lines rather than
> > > skipping each line till we get there. The below diff makes this change.
> >
> > I completely forgot to explain that atom->pos is indeed set whether
> > mmap'd or not--it is just that atom->at is _only_ set when mmap'd.
> > This is done in the atomize routines in diff_atomize_text.c.
> > Perhaps the comment in the diff_main.h header could be more precise?
>
> Indeed. The code is good but the comment is misleading. We should
> fix the comment in diff.git and eventually sync over the change.
>
> > > This also requires tightening the check for chunks with only added lines
> > > so we don't grab the offset to unchanged lines (i.e., right_count ==
> > > left_count). On that note, should we also expose the diff_chunk_type()
> > > routine?
>
> I would say yes. Having this in the public diff APi will be useful as
> we expose more of the internals to improve caller efficiency.
Thanks, I think so too! The below diff amends the misleading comment,
makes diff_chunk_type() public, and replaces the handrolled chunk type
deduction with it in get_modified_file_content_status().
If ok, I'll commit the diff bits to diff.git and sync them over before
committing the worktree.c change.
diffstat /home/mark/src/got
M lib/diff_internal.h | 0+ 24-
M lib/diff_main.c | 16+ 0-
M lib/diff_main.h | 12+ 2-
M lib/worktree.c | 1+ 4-
4 files changed, 29 insertions(+), 30 deletions(-)
diff /home/mark/src/got
commit - 2262237761cd6c8c89a4d4263a8d9c05a7a81d70
path + /home/mark/src/got
blob - 7c2bfddb8f77a9026ec782026f3775a07e004f5e
file + lib/diff_internal.h
--- lib/diff_internal.h
+++ lib/diff_internal.h
@@ -96,30 +96,6 @@ enum diff_chunk_type {
#define DIFF_RESULT_ALLOC_BLOCKSIZE 128
-enum diff_chunk_type {
- CHUNK_EMPTY,
- CHUNK_PLUS,
- CHUNK_MINUS,
- CHUNK_SAME,
- CHUNK_ERROR,
-};
-
-static inline enum diff_chunk_type
-diff_chunk_type(const struct diff_chunk *chunk)
-{
- if (!chunk->left_count && !chunk->right_count)
- return CHUNK_EMPTY;
- if (!chunk->solved)
- return CHUNK_ERROR;
- if (!chunk->right_count)
- return CHUNK_MINUS;
- if (!chunk->left_count)
- return CHUNK_PLUS;
- if (chunk->left_count != chunk->right_count)
- return CHUNK_ERROR;
- return CHUNK_SAME;
-}
-
struct diff_chunk_context;
bool
blob - 25d476df152bc9f65ca65f47590adcfc8ef6ae48
file + lib/diff_main.c
--- lib/diff_main.c
+++ lib/diff_main.c
@@ -35,6 +35,22 @@ static int
#include "diff_internal.h"
#include "diff_debug.h"
+inline enum diff_chunk_type
+diff_chunk_type(const struct diff_chunk *chunk)
+{
+ if (!chunk->left_count && !chunk->right_count)
+ return CHUNK_EMPTY;
+ if (!chunk->solved)
+ return CHUNK_ERROR;
+ if (!chunk->right_count)
+ return CHUNK_MINUS;
+ if (!chunk->left_count)
+ return CHUNK_PLUS;
+ if (chunk->left_count != chunk->right_count)
+ return CHUNK_ERROR;
+ return CHUNK_SAME;
+}
+
static int
read_at(FILE *f, off_t at_pos, unsigned char *buf, size_t len)
{
blob - fa9dc982956eab298f65b4b9ffca114a46b50b76
file + lib/diff_main.h
--- lib/diff_main.h
+++ lib/diff_main.h
@@ -28,8 +28,8 @@ struct diff_atom {
struct diff_atom {
struct diff_data *root; /* back pointer to root diff data */
- off_t pos; /* if not memory-mapped */
- const uint8_t *at; /* if memory-mapped */
+ off_t pos; /* set whether memory-mapped or not */
+ const uint8_t *at; /* only set if memory-mapped */
off_t len;
/* This hash is just a very cheap speed up for finding *mismatching*
@@ -142,6 +142,16 @@ struct diff_state;
diff_chunk_arraylist_t chunks;
};
+enum diff_chunk_type {
+ CHUNK_EMPTY,
+ CHUNK_PLUS,
+ CHUNK_MINUS,
+ CHUNK_SAME,
+ CHUNK_ERROR,
+};
+
+enum diff_chunk_type diff_chunk_type(const struct diff_chunk *c);
+
struct diff_state;
/* Signature of a utility function to divide a file into diff atoms.
blob - 010ea00d11c12ab19a0251310a86b1a87bc8a3e9
file + lib/worktree.c
--- lib/worktree.c
+++ lib/worktree.c
@@ -1561,17 +1561,14 @@ get_modified_file_content_status(unsigned char *status
struct diff_chunk *c;
struct diff_chunk_context cc = {};
off_t pos;
- int clc, crc;
/*
* We can optimise a little by advancing straight
* to the next chunk if this one has no added lines.
*/
c = diff_chunk_get(r, n);
- clc = diff_chunk_get_left_count(c);
- crc = diff_chunk_get_right_count(c);
- if (!crc || crc == clc) {
+ if (diff_chunk_type(c) != CHUNK_PLUS) {
nchunks_parsed = 1;
continue; /* removed or unchanged lines */
}
--
Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68
expose chunk offsets in the diff API