"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: got patch vs unchanged files
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Sat, 19 Mar 2022 14:21:33 +0100

Download raw body.

Thread
Stefan Sperling <stsp@stsp.name> wrote:
> [...]
> One thing that is not very obvious, and can certainly be fixed later:
> 
> > +const struct got_error *
> > +got_worktree_patch_prepare(const char *old, const char *new,
> > +    char **oldpath, char **newpath, struct got_worktree *worktree,
> > +    struct got_repository *repo)
> > +{
> > +	const struct got_error *err = NULL;
> > +	struct got_fileindex *fileindex = NULL;
> > +	char *fileindex_path = NULL;
> > +	int file_renamed = 0;
> > +	unsigned char status_old, staged_status_old;
> > +	unsigned char status_new, staged_status_new;
> > +
> > +	*oldpath = NULL;
> > +	*newpath = NULL;
> > +
> > +	err = open_fileindex(&fileindex, &fileindex_path, worktree);
> 
> Opening the file index is relatively expensive on large work trees.
> This is why rebase, histedit, etc. return the fileindex to the caller
> as an opaque pointer for re-use. It can speed things up a lot.
> 
> In your case, this would require splitting this function in two,
> one for initialization, and one which performs the per-path checks.
> You would also need a function that cleans up (ie. closes the fileindex),
> called at the very end of the pack operation.
> Restructuring the code in this way should improve performance with large
> patches.
> 
> And if 'got patch' would ever want to track state which can be expressed
> as references in refs/got/, such a design would provide the places to
> create and clean up such references.

I didn't know about the open_fileindex cost, i still haven't tried to
profile 'got patch'; but since I'm adding this code for the first time,
why don't do the correct thing from the start? :)

updated diff with the function split into three:
got_worktree_patch_prepare which opens the fileindex, check_path which
does the path checks and patch_complete that closes the fileindex.

atm i don't need to track state across the patch operation, so the
_prepare and _complete are just wrappers around the fileindex open/close
function.  in the future i'd like to (or at least try to) create commits
from 'got patch' (akin to what 'git am' does) so it'll be useful.


diff 46ebad135d9dc52eb84d6985393298465fa7b3ff /home/op/w/got
blob - 2a33834a903f96a6f0206be4636193177a5f443d
file + include/got_worktree.h
--- include/got_worktree.h
+++ include/got_worktree.h
@@ -544,3 +544,21 @@ got_worktree_path_info(struct got_worktree *, struct g
 
 /* References pointing at pre-histedit commit backups. */
 #define GOT_WORKTREE_HISTEDIT_BACKUP_REF_PREFIX "refs/got/backup/histedit"
+
+/*
+ * Prepare for applying a patch.
+ */
+const struct got_error *
+got_worktree_patch_prepare(struct got_fileindex **, struct got_worktree *);
+
+/*
+ * Lookup paths for the "old" and "new" file before patching and check their
+ * status.
+ */
+const struct got_error *
+got_worktree_patch_check_path(const char *, const char *, char **, char **,
+    struct got_worktree *, struct got_repository *, struct got_fileindex *);
+
+/* Complete the patch operation. */
+const struct got_error *
+got_worktree_patch_complete(struct got_fileindex *);
blob - 4aabb46d0d6d31262d423862cce84ad3cf0b11f6
file + lib/patch.c
--- lib/patch.c
+++ lib/patch.c
@@ -497,91 +497,6 @@ done:
 }
 
 static const struct got_error *
-build_pathlist(const char *p, char **path, struct got_pathlist_head *head,
-    struct got_worktree *worktree)
-{
-	const struct got_error *err;
-	struct got_pathlist_entry *pe;
-
-	err = got_worktree_resolve_path(path, worktree, p);
-	if (err == NULL)
-		err = got_pathlist_insert(&pe, head, *path, NULL);
-	return err;
-}
-
-static const struct got_error *
-can_rm(void *arg, unsigned char status, unsigned char staged_status,
-    const char *path, struct got_object_id *blob_id,
-    struct got_object_id *staged_blob_id, struct got_object_id *commit_id,
-    int dirfd, const char *de_name)
-{
-	if (status == GOT_STATUS_NONEXISTENT)
-		return got_error_set_errno(ENOENT, path);
-	if (status != GOT_STATUS_NO_CHANGE &&
-	    status != GOT_STATUS_ADD &&
-	    status != GOT_STATUS_MODIFY &&
-	    status != GOT_STATUS_MODE_CHANGE)
-		return got_error_path(path, GOT_ERR_FILE_STATUS);
-	if (staged_status == GOT_STATUS_DELETE)
-		return got_error_path(path, GOT_ERR_FILE_STATUS);
-	return NULL;
-}
-
-static const struct got_error *
-can_add(void *arg, unsigned char status, unsigned char staged_status,
-    const char *path, struct got_object_id *blob_id,
-    struct got_object_id *staged_blob_id, struct got_object_id *commit_id,
-    int dirfd, const char *de_name)
-{
-	if (status != GOT_STATUS_NONEXISTENT)
-		return got_error_path(path, GOT_ERR_FILE_STATUS);
-	return NULL;
-}
-
-static const struct got_error *
-can_edit(void *arg, unsigned char status, unsigned char staged_status,
-    const char *path, struct got_object_id *blob_id,
-    struct got_object_id *staged_blob_id, struct got_object_id *commit_id,
-    int dirfd, const char *de_name)
-{
-	if (status == GOT_STATUS_NONEXISTENT)
-		return got_error_set_errno(ENOENT, path);
-	if (status != GOT_STATUS_NO_CHANGE &&
-	    status != GOT_STATUS_ADD &&
-	    status != GOT_STATUS_MODIFY)
-		return got_error_path(path, GOT_ERR_FILE_STATUS);
-	if (staged_status == GOT_STATUS_DELETE)
-		return got_error_path(path, GOT_ERR_FILE_STATUS);
-	return NULL;
-}
-
-static const struct got_error *
-check_file_status(struct got_patch *p, int file_renamed,
-    struct got_worktree *worktree, struct got_repository *repo,
-    struct got_pathlist_head *old, struct got_pathlist_head *new,
-    got_cancel_cb cancel_cb, void *cancel_arg)
-{
-	static const struct got_error *err;
-
-	if (p->old != NULL && p->new == NULL)
-		return got_worktree_status(worktree, old, repo, 0,
-		    can_rm, NULL, cancel_cb, cancel_arg);
-	else if (file_renamed) {
-		err = got_worktree_status(worktree, old, repo, 0,
-		    can_rm, NULL, cancel_cb, cancel_arg);
-		if (err)
-			return err;
-		return got_worktree_status(worktree, new, repo, 0,
-		    can_add, NULL, cancel_cb, cancel_arg);
-	} else if (p->old == NULL)
-		return got_worktree_status(worktree, new, repo, 0,
-		    can_add, NULL, cancel_cb, cancel_arg);
-	else
-		return got_worktree_status(worktree, new, repo, 0,
-		    can_edit, NULL, cancel_cb, cancel_arg);
-}
-
-static const struct got_error *
 report_progress(struct patch_args *pa, const char *old, const char *new,
     unsigned char status, const struct got_error *orig_error)
 {
@@ -622,23 +537,29 @@ patch_add(void *arg, unsigned char status, const char 
 
 static const struct got_error *
 apply_patch(struct got_worktree *worktree, struct got_repository *repo,
-    struct got_pathlist_head *oldpaths, struct got_pathlist_head *newpaths,
     const char *oldpath, const char *newpath, struct got_patch *p,
     int nop, struct patch_args *pa, got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
+	struct got_pathlist_head oldpaths, newpaths;
+	struct got_pathlist_entry *pe;
 	int file_renamed = 0;
 	char *tmppath = NULL, *template = NULL, *parent = NULL;;
 	FILE *tmp = NULL;
 	mode_t mode = GOT_DEFAULT_FILE_MODE;
 
+	TAILQ_INIT(&oldpaths);
+	TAILQ_INIT(&newpaths);
+
+	err = got_pathlist_insert(&pe, &oldpaths, oldpath, NULL);
+	if (err)
+		goto done;
+	err = got_pathlist_insert(&pe, &newpaths, newpath, NULL);
+	if (err)
+		goto done;
+
 	file_renamed = strcmp(oldpath, newpath);
 
-	err = check_file_status(p, file_renamed, worktree, repo, oldpaths,
-	    newpaths, cancel_cb, cancel_arg);
-	if (err)
-		goto done;
-
 	if (asprintf(&template, "%s/got-patch",
 	    got_worktree_get_root_path(worktree)) == -1) {
 		err = got_error_from_errno(template);
@@ -657,7 +578,7 @@ apply_patch(struct got_worktree *worktree, struct got_
 		goto done;
 
 	if (p->old != NULL && p->new == NULL) {
-		err = got_worktree_schedule_delete(worktree, oldpaths,
+		err = got_worktree_schedule_delete(worktree, &oldpaths,
 		    0, NULL, patch_delete, pa, repo, 0, 0);
 		goto done;
 	}
@@ -688,21 +609,25 @@ apply_patch(struct got_worktree *worktree, struct got_
 	}
 
 	if (file_renamed) {
-		err = got_worktree_schedule_delete(worktree, oldpaths,
+		err = got_worktree_schedule_delete(worktree, &oldpaths,
 		    0, NULL, patch_delete, pa, repo, 0, 0);
 		if (err == NULL)
-			err = got_worktree_schedule_add(worktree, newpaths,
+			err = got_worktree_schedule_add(worktree, &newpaths,
 			    patch_add, pa, repo, 1);
-	} else if (p->old == NULL)
-		err = got_worktree_schedule_add(worktree, newpaths,
+		if (err)
+			unlink(newpath);
+	} else if (p->old == NULL) {
+		err = got_worktree_schedule_add(worktree, &newpaths,
 		    patch_add, pa, repo, 1);
-	else
+		if (err)
+			unlink(newpath);
+	} else
 		err = report_progress(pa, oldpath, newpath, GOT_STATUS_MODIFY,
 		    NULL);
 
 done:
-	if (err != NULL && newpath != NULL && (file_renamed || p->old == NULL))
-		unlink(newpath);
+	got_pathlist_free(&oldpaths);
+	got_pathlist_free(&newpaths);
 	free(parent);
 	free(template);
 	if (tmppath != NULL)
@@ -711,44 +636,13 @@ done:
 	return err;
 }
 
-static const struct got_error *
-resolve_paths(struct got_patch *p, struct got_worktree *worktree,
-    struct got_repository *repo, struct got_pathlist_head *oldpaths,
-    struct got_pathlist_head *newpaths, char **old, char **new)
-{
-	const struct got_error *err;
-
-	TAILQ_INIT(oldpaths);
-	TAILQ_INIT(newpaths);
-	*old = NULL;
-	*new = NULL;
-
-	err = build_pathlist(p->old != NULL ? p->old : p->new, old,
-	    oldpaths, worktree);
-	if (err)
-		goto err;
-
-	err = build_pathlist(p->new != NULL ? p->new : p->old, new,
-	    newpaths, worktree);
-	if (err)
-		goto err;
-	return NULL;
-
-err:
-	free(*old);
-	free(*new);
-	got_pathlist_free(oldpaths);
-	got_pathlist_free(newpaths);
-	return err;
-}
-
 const struct got_error *
 got_patch(int fd, struct got_worktree *worktree, struct got_repository *repo,
     int nop, got_patch_progress_cb progress_cb, void *progress_arg,
     got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
-	struct got_pathlist_head oldpaths, newpaths;
+	struct got_fileindex *fileindex = NULL;
 	char *oldpath, *newpath;
 	struct imsgbuf *ibuf;
 	int imsg_fds[2] = {-1, -1};
@@ -788,6 +682,10 @@ got_patch(int fd, struct got_worktree *worktree, struc
 	if (err)
 		goto done;
 
+	err = got_worktree_patch_prepare(&fileindex, worktree);
+	if (err)
+		goto done;
+
 	while (!done && err == NULL) {
 		struct got_patch p;
 		struct patch_args pa;
@@ -800,13 +698,11 @@ got_patch(int fd, struct got_worktree *worktree, struc
 		if (err || done)
 			break;
 
-		err = resolve_paths(&p, worktree, repo, &oldpaths,
-		    &newpaths, &oldpath, &newpath);
-		if (err)
-			break;
-
-		err = apply_patch(worktree, repo, &oldpaths, &newpaths,
-		    oldpath, newpath, &p, nop, &pa, cancel_cb, cancel_arg);
+		err = got_worktree_patch_check_path(p.old, p.new, &oldpath,
+		    &newpath, worktree, repo, fileindex);
+		if (err == NULL)
+			err = apply_patch(worktree, repo, oldpath, newpath,
+			    &p, nop, &pa, cancel_cb, cancel_arg);
 		if (err != NULL) {
 			failed = 1;
 			/* recoverable errors */
@@ -821,8 +717,6 @@ got_patch(int fd, struct got_worktree *worktree, struc
 
 		free(oldpath);
 		free(newpath);
-		got_pathlist_free(&oldpaths);
-		got_pathlist_free(&newpaths);
 		patch_free(&p);
 
 		if (err)
@@ -830,6 +724,8 @@ got_patch(int fd, struct got_worktree *worktree, struc
 	}
 
 done:
+	if (fileindex)
+		got_worktree_patch_complete(fileindex);
 	if (fd != -1 && close(fd) == -1 && err == NULL)
 		err = got_error_from_errno("close");
 	if (ibuf != NULL)
blob - 26ce9d0c775728d7d4b6a3450c3704c51f29672a
file + lib/worktree.c
--- lib/worktree.c
+++ lib/worktree.c
@@ -8712,3 +8712,150 @@ done:
 		err = unlockerr;
 	return err;
 }
+
+static const struct got_error *
+patch_prepare_path(const char *p, char **path, unsigned char *status,
+    unsigned char *staged_status, struct got_fileindex *fileindex,
+    struct got_worktree *worktree, struct got_repository *repo)
+{
+	const struct got_error *err;
+	struct got_fileindex_entry *ie;
+	struct stat sb;
+	char *ondisk_path = NULL;
+
+	err = got_worktree_resolve_path(path, worktree, p);
+	if (err)
+		return err;
+
+	if (asprintf(&ondisk_path, "%s%s%s", worktree->root_path,
+	    *path[0] ? "/" : "", *path) == -1)
+		return got_error_from_errno("asprintf");
+
+	ie = got_fileindex_entry_get(fileindex, *path, strlen(*path));
+	if (ie) {
+		*staged_status = get_staged_status(ie);
+		err = get_file_status(status, &sb, ie, *path, -1, NULL, repo);
+		if (err)
+			goto done;
+	} else {
+		*staged_status = GOT_STATUS_NO_CHANGE;
+		*status = GOT_STATUS_UNVERSIONED;
+		if (lstat(ondisk_path, &sb) == -1) {
+			if (errno != ENOENT) {
+				err = got_error_from_errno2("lstat",
+				    ondisk_path);
+				goto done;
+			}
+			*status = GOT_STATUS_NONEXISTENT;
+		}
+	}
+
+done:
+	free(ondisk_path);
+	return err;
+}
+
+static const struct got_error *
+patch_can_rm(const char *path, unsigned char status,
+    unsigned char staged_status)
+{
+	if (status == GOT_STATUS_NONEXISTENT)
+		return got_error_set_errno(ENOENT, path);
+	if (status != GOT_STATUS_NO_CHANGE &&
+	    status != GOT_STATUS_ADD &&
+	    status != GOT_STATUS_MODIFY &&
+	    status != GOT_STATUS_MODE_CHANGE)
+		return got_error_path(path, GOT_ERR_FILE_STATUS);
+	if (staged_status == GOT_STATUS_DELETE)
+		return got_error_path(path, GOT_ERR_FILE_STATUS);
+	return NULL;
+}
+
+static const struct got_error *
+patch_can_add(const char *path, unsigned char status)
+{
+	if (status != GOT_STATUS_NONEXISTENT)
+		return got_error_path(path, GOT_ERR_FILE_STATUS);
+	return NULL;
+}
+
+static const struct got_error *
+patch_can_edit(const char *path, unsigned char status,
+    unsigned char staged_status)
+{
+	if (status == GOT_STATUS_NONEXISTENT)
+		return got_error_set_errno(ENOENT, path);
+	if (status != GOT_STATUS_NO_CHANGE &&
+	    status != GOT_STATUS_ADD &&
+	    status != GOT_STATUS_MODIFY)
+		return got_error_path(path, GOT_ERR_FILE_STATUS);
+	if (staged_status == GOT_STATUS_DELETE)
+		return got_error_path(path, GOT_ERR_FILE_STATUS);
+	return NULL;
+}
+
+const struct got_error *
+got_worktree_patch_prepare(struct got_fileindex **fileindex,
+    struct got_worktree *worktree)
+{
+	const struct got_error *err;
+	char *fileindex_path = NULL;
+
+	err = open_fileindex(fileindex, &fileindex_path, worktree);
+	free(fileindex_path);
+	return err;
+}
+
+const struct got_error *
+got_worktree_patch_check_path(const char *old, const char *new,
+    char **oldpath, char **newpath, struct got_worktree *worktree,
+    struct got_repository *repo, struct got_fileindex *fileindex)
+{
+	const struct got_error *err = NULL;
+	int file_renamed = 0;
+	unsigned char status_old, staged_status_old;
+	unsigned char status_new, staged_status_new;
+
+	*oldpath = NULL;
+	*newpath = NULL;
+
+	err = patch_prepare_path(old != NULL ? old : new, oldpath,
+	    &status_old, &staged_status_old, fileindex, worktree, repo);
+	if (err)
+		goto done;
+
+	err = patch_prepare_path(new != NULL ? new : old, newpath,
+	    &status_new, &staged_status_new, fileindex, worktree, repo);
+	if (err)
+		goto done;
+
+	if (old != NULL && new != NULL && strcmp(old, new) != 0)
+		file_renamed = 1;
+
+	if (old != NULL && new == NULL)
+		err = patch_can_rm(*oldpath, status_old, staged_status_old);
+	else if (file_renamed) {
+		err = patch_can_rm(*oldpath, status_old, staged_status_old);
+		if (err == NULL)
+			err = patch_can_add(*newpath, status_new);
+	} else if (old == NULL)
+		err = patch_can_add(*newpath, status_new);
+	else
+		err = patch_can_edit(*newpath, status_new, staged_status_new);
+
+done:
+	if (err) {
+		free(*oldpath);
+		*oldpath = NULL;
+		free(*newpath);
+		*newpath = NULL;
+	}
+	return err;
+}
+
+const struct got_error *
+got_worktree_patch_complete(struct got_fileindex *fileindex)
+{
+	got_fileindex_free(fileindex);
+	return NULL;
+}
blob - 65fb8dead18ae13233a8eb24e2f1f26a41d8906e
file + regress/cmdline/patch.sh
--- regress/cmdline/patch.sh
+++ regress/cmdline/patch.sh
@@ -824,7 +824,22 @@ EOF
 	ret=$?
 	if [ $ret -ne 0 ]; then
 		diff -u $testroot/stderr.expected $testroot/stderr
+		test_done $testroot $ret
+		return 1
 	fi
+
+	(cd $testroot/wt && got status) > $testroot/stdout
+	cat <<EOF > $testroot/stdout.expected
+~  alpha
+?  kappa
+?  patch
+EOF
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+	fi
 	test_done $testroot $ret
 }