"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: add a diff cache to tog
To:
gameoftrees@openbsd.org
Date:
Sun, 1 Aug 2021 20:11:21 +0200

Download raw body.

Thread
On Sun, Aug 01, 2021 at 07:42:57PM +0200, Stefan Sperling wrote:
> This patch adds a diff cache to tog.
> 
> For very large changes computing the list of changed paths and text diffs
> can take quite a while. If we keep a couple of temporary files open which
> contain generated diff output we can avoid reproducing a diff from scratch
> if the user wants to see a diff again which we have already cached.
> The cache is keyed on object IDs and diff options.
> 
> The crazy nanobox-pkgsrc-lite.git repo is an obvious way test the benefits.
> But the effect can be noticable even on the OpenBSD src repository when
> browsing commits which touch pcidevs or the drm graphics drivers.
> 
> Does this patch make sense or is it introducing too much complexity?

Fixed version that doesn't crash on commits which only add or delete
files, such as the 'Initial import of NetBSD' commit in OpenBSD.

diff 773b2b4fbe075847b2f3465a54356d1d3992a9d5 987e0a673285446abc3a73b04d53c7410f1151e7
blob - ea3fba4da025916e37becf4e0cca9487bb3aded6
blob + 248da1efe12b305a2ba5b59f18dd69da9ad342ed
--- tog/tog.c
+++ tog/tog.c
@@ -17,6 +17,7 @@
 #include <sys/queue.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
+#include <sys/resource.h>
 
 #include <ctype.h>
 #include <errno.h>
@@ -299,6 +300,144 @@ struct tog_diff_view_state {
 	struct tog_view *log_view;
 };
 
+#define TOG_DIFF_CACHE_SIZE_MAX 64
+
+struct tog_diff_cache_entry {
+	struct got_object_id *id1, *id2;
+	FILE *f;
+	int diff_context;
+	int ignore_whitespace;
+	int force_text_diff;
+	size_t nlines;
+	off_t *line_offsets;
+};
+static struct tog_diff_cache_entry tog_diff_cache[TOG_DIFF_CACHE_SIZE_MAX];
+static int tog_diff_cache_size;
+static int tog_diff_cache_nelem;
+
+struct tog_diff_cache_entry *
+get_cached_diff(struct tog_diff_view_state *s)
+{
+	struct tog_diff_cache_entry *entry;
+	int i;
+
+	for (i = 0; i < tog_diff_cache_nelem; i++) {
+		entry = &tog_diff_cache[i];
+		if (s->id1 && entry->id1) {
+			if (got_object_id_cmp(s->id1, entry->id1) != 0)
+				continue;
+		} else if (s->id1 || entry->id1)
+			continue;
+		if (s->id2 && entry->id2) {
+			if (got_object_id_cmp(s->id2, entry->id2) != 0)
+				continue;
+		} else if (s->id2 || entry->id2)
+			continue;
+		if (s->diff_context == entry->diff_context &&
+		    s->ignore_whitespace == entry->ignore_whitespace &&
+		    s->force_text_diff == entry->force_text_diff)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
+close_diff_cache_entry(struct tog_diff_cache_entry *entry)
+{
+	const struct got_error *err = NULL;
+
+	free(entry->id1);
+	free(entry->id2);
+	free(entry->line_offsets);
+	if (entry->f && fclose(entry->f) == EOF)
+		err = got_error_from_errno("fclose");
+	memset(entry, 0, sizeof(*entry));
+	return err;
+}
+
+static void
+tog_close_diffs(void)
+{
+	const struct got_error *err;
+	int i;
+
+	for (i = 0; i < tog_diff_cache_nelem; i++) {
+		err = close_diff_cache_entry(&tog_diff_cache[i]);
+		if (err) {
+			fprintf(stderr,
+			    "%s: error closing diff cache entry %d: %s",
+			    getprogname(), i, err->msg);
+		}
+	}
+}
+
+static const struct got_error *
+init_diff_cache_entry(struct tog_diff_cache_entry *entry,
+    struct tog_diff_view_state *s)
+{
+	const struct got_error *err;
+
+	if (s->id1) {
+		entry->id1 = got_object_id_dup(s->id1);
+		if (entry->id1 == NULL)
+			return got_error_from_errno("got_object_id_dup");
+	} else
+		entry->id1 = NULL;
+	if (s->id2) {
+		entry->id2 = got_object_id_dup(s->id2);
+		if (entry->id2 == NULL) {
+			err = got_error_from_errno("got_object_id_dup");
+			free(entry->id1);
+			entry->id1 = NULL;
+			return err;
+		}
+	} else 
+		s->id2 = NULL;
+	entry->diff_context = s->diff_context;
+	entry->ignore_whitespace = s->ignore_whitespace;
+	entry->force_text_diff = s->force_text_diff;
+	entry->nlines = s->nlines;
+	entry->line_offsets = s->line_offsets;
+	entry->f = s->f;	
+	return NULL;
+}
+
+static const struct got_error *
+cache_diff(struct tog_diff_view_state *s)
+{
+	const struct got_error *err;
+	struct tog_diff_cache_entry *entry = NULL;
+	struct rlimit rl;
+
+	/* Limit cache size according to resource limits. */
+	if (tog_diff_cache_size == 0) {
+		tog_diff_cache_size = TOG_DIFF_CACHE_SIZE_MAX;
+		if (getrlimit(RLIMIT_NOFILE, &rl) == -1)
+			return got_error_from_errno("getrlimit");
+		if (tog_diff_cache_size > rl.rlim_cur / 16)
+			tog_diff_cache_size = rl.rlim_cur / 16;
+	}
+	if (tog_diff_cache_size <= 0) /* bogus rlimit?!? */
+		return NULL;
+
+	if (tog_diff_cache_nelem >= tog_diff_cache_size) {
+		/* Cache is full. Close the last entry. */
+		entry = &tog_diff_cache[tog_diff_cache_nelem - 1];
+		err = close_diff_cache_entry(entry);
+		if (err)
+			return err;
+	} else
+		tog_diff_cache_nelem++;
+
+	/* Make space for a new entry at the front. */
+	memmove(&tog_diff_cache[1], &tog_diff_cache[0],
+	    sizeof(tog_diff_cache) - sizeof(tog_diff_cache[0]));
+
+	entry = &tog_diff_cache[0];
+	return init_diff_cache_entry(entry, s);
+}
+
 pthread_mutex_t tog_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 struct tog_log_thread_args {
@@ -2820,6 +2959,7 @@ done:
 	if (worktree)
 		got_worktree_close(worktree);
 	tog_free_refs();
+	tog_close_diffs();
 	return error;
 }
 
@@ -3256,25 +3396,27 @@ static const struct got_error *
 create_diff(struct tog_diff_view_state *s)
 {
 	const struct got_error *err = NULL;
-	FILE *f = NULL;
 	int obj_type;
+	struct tog_diff_cache_entry *cache_entry;
 
-	free(s->line_offsets);
+	cache_entry = get_cached_diff(s);
+	if (cache_entry) {
+		s->f = cache_entry->f;
+		s->line_offsets = cache_entry->line_offsets;
+		s->nlines = cache_entry->nlines;
+		return NULL;
+	}
+
 	s->line_offsets = malloc(sizeof(off_t));
 	if (s->line_offsets == NULL)
 		return got_error_from_errno("malloc");
 	s->nlines = 0;
 
-	f = got_opentemp();
-	if (f == NULL) {
+	s->f = got_opentemp();
+	if (s->f == NULL) {
 		err = got_error_from_errno("got_opentemp");
 		goto done;
 	}
-	if (s->f && fclose(s->f) == EOF) {
-		err = got_error_from_errno("fclose");
-		goto done;
-	}
-	s->f = f;
 
 	if (s->id1)
 		err = got_object_get_type(&obj_type, s->repo, s->id1);
@@ -3336,9 +3478,20 @@ create_diff(struct tog_diff_view_state *s)
 	}
 	if (err)
 		goto done;
+
+	err = cache_diff(s);
 done:
 	if (s->f && fflush(s->f) != 0 && err == NULL)
 		err = got_error_from_errno("fflush");
+	if (err) {
+		if (s->f) {
+			fclose(s->f);
+			s->f = NULL;
+		}
+		free(s->line_offsets);
+		s->line_offsets = NULL;
+		s->nlines = 0;
+	}
 	return err;
 }
 
@@ -3562,11 +3715,9 @@ close_diff_view(struct tog_view *view)
 	s->id1 = NULL;
 	free(s->id2);
 	s->id2 = NULL;
-	if (s->f && fclose(s->f) == EOF)
-		err = got_error_from_errno("fclose");
 	free_colors(&s->colors);
-	free(s->line_offsets);
-	s->line_offsets = NULL;
+	s->f = NULL;		/* cached */
+	s->line_offsets = NULL;	/* cached */
 	s->nlines = 0;
 	return err;
 }
@@ -3886,6 +4037,7 @@ done:
 	if (worktree)
 		got_worktree_close(worktree);
 	tog_free_refs();
+	tog_close_diffs();
 	return error;
 }
 
@@ -4813,6 +4965,7 @@ done:
 			error = close_err;
 	}
 	tog_free_refs();
+	tog_close_diffs();
 	return error;
 }
 
@@ -5639,6 +5792,7 @@ done:
 			error = close_err;
 	}
 	tog_free_refs();
+	tog_close_diffs();
 	return error;
 }
 
@@ -6305,6 +6459,7 @@ done:
 			error = close_err;
 	}
 	tog_free_refs();
+	tog_close_diffs();
 	return error;
 }
 
@@ -6459,6 +6614,7 @@ done:
 		free(cmd_argv);
 	}
 	tog_free_refs();
+	tog_close_diffs();
 	return error;
 }