"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: merge_file() refactoring
To:
gameoftrees@openbsd.org
Date:
Mon, 31 May 2021 13:22:22 +0200

Download raw body.

Thread
On Sun, May 30, 2021 at 11:24:38AM +0200, Stefan Sperling wrote:
> Refactor merge_file() such that it no longer requires a blob object parameter.
> 
> This is a non-functional change, but one step of potentially many towards
> solving the cherrypick merge bug recently discussed in another thread.
> 
> ok?

This patch supersedes my previous patch and goes some steps further.

With this, merge_file() accepts FILE * arguments for the original and first
derived content used during a merge. The merge target (and second derived
content) must remain a path because merge_file() renames a temporary file
which contains the merge result onto the actual merge target path.

Overall, this results in cleaner separation of concerns between layers.
It forces us to push FILE * all the way down into lib/buf.c which gets
rid of a lot of redundant opening and closing of the same file.
Instead we now rewind the existing open file and reuse it. This should be
more efficient, in particular because this code runs with unveil(2) enabled.
But I didn't bother profiling this change.

ok?

diff refs/heads/main refs/heads/mergefile
blob - b26da2a0b727f30ffdba6e4970dafa32b3856f53
blob + f17dddcabf8aa19fd01c17dec68dc1a8ec2dd748
--- lib/buf.c
+++ lib/buf.c
@@ -83,38 +83,34 @@ buf_alloc(BUF **b, size_t len)
  * Sets errno on error.
  */
 const struct got_error *
-buf_load(BUF **buf, const char *path)
+buf_load(BUF **buf, FILE *f)
 {
 	const struct got_error *err = NULL;
-	int fd;
-	ssize_t ret;
+	size_t ret;
 	size_t len;
 	u_char *bp;
 	struct stat st;
 
 	*buf = NULL;
 
-	fd = open(path, O_RDONLY, 0600);
-	if (fd == -1)
-		return got_error_from_errno2("open", path);
-
-	if (fstat(fd, &st) == -1) {
-		err = got_error_from_errno2("fstat", path);
+	if (fstat(fileno(f), &st) == -1) {
+		err = got_error_from_errno("fstat");
 		goto out;
 	}
 
 	if ((uintmax_t)st.st_size > SIZE_MAX) {
-		err = got_error_set_errno(EFBIG, path);
+		err = got_error_set_errno(EFBIG,
+		    "cannot fit file into memory buffer");
 		goto out;
 	}
 	err = buf_alloc(buf, st.st_size);
 	if (err)
 		goto out;
-	for (bp = (*buf)->cb_buf; ; bp += (size_t)ret) {
+	for (bp = (*buf)->cb_buf; ; bp += ret) {
 		len = SIZE_LEFT(*buf);
-		ret = read(fd, bp, len);
-		if (ret == -1) {
-			err = got_error_from_errno2("read", path);
+		ret = fread(bp, 1, len, f);
+		if (ret == 0 && ferror(f)) {
+			err = got_ferror(f, GOT_ERR_IO);
 			goto out;
 		} else if (ret == 0)
 			break;
@@ -123,8 +119,6 @@ buf_load(BUF **buf, const char *path)
 	}
 
 out:
-	if (close(fd) == -1 && err == NULL)
-		err = got_error_from_errno2("close", path);
 	if (err) {
 		buf_free(*buf);
 		*buf = NULL;
blob - 9422c0f343fd6d75cb7d237cc91e732a5f60405e
blob + 354d7c4ea8684b77bc84fa0fcad6b6c8e2b2be80
--- lib/buf.h
+++ lib/buf.h
@@ -50,7 +50,7 @@ struct buf {
 };
 
 const struct got_error *buf_alloc(BUF **, size_t);
-const struct got_error *buf_load(BUF **, const char *);
+const struct got_error *buf_load(BUF **, FILE *);
 void		 buf_free(BUF *);
 void		*buf_release(BUF *);
 u_char		 buf_getc(BUF *, size_t);
blob - f9cd08718072cece748caacb6f92cb0c91edc747
blob + b8c328a2f9b853caabcc322afe5c68eefd3b4ccb
--- lib/diff3.c
+++ lib/diff3.c
@@ -236,8 +236,12 @@ diffreg(BUF **d, const char *path1, const char *path2)
 		err = got_error_from_errno2("fflush", outpath);
 		goto done;
 	}
+	if (fseek(outfile, 0L, SEEK_SET) == -1) {
+		err = got_ferror(outfile, GOT_ERR_IO);
+		goto done;
+	}
 
-	err = buf_load(d, outpath);
+	err = buf_load(d, outfile);
 done:
 	if (outpath) {
 		if (unlink(outpath) == -1 && err == NULL)
@@ -257,8 +261,8 @@ done:
  * For merge(1).
  */
 const struct got_error *
-got_merge_diff3(int *overlapcnt, int outfd, const char *p1, const char *p2,
-    const char *p3, const char *label1, const char *label2, const char *label3)
+got_merge_diff3(int *overlapcnt, int outfd, FILE *f1, FILE *f2,
+    FILE *f3, const char *label1, const char *label2, const char *label3)
 {
 	const struct got_error *err = NULL;
 	char *dp13, *dp23, *path1, *path2, *path3;
@@ -277,13 +281,13 @@ got_merge_diff3(int *overlapcnt, int outfd, const char
 	dp13 = dp23 = path1 = path2 = path3 = NULL;
 	data = patch = NULL;
 
-	err = buf_load(&b1, p1);
+	err = buf_load(&b1, f1);
 	if (err)
 		goto out;
-	err = buf_load(&b2, p2);
+	err = buf_load(&b2, f2);
 	if (err)
 		goto out;
-	err = buf_load(&b3, p3);
+	err = buf_load(&b3, f3);
 	if (err)
 		goto out;
 
blob - ae0e6d73bc7b11bc4dac33b1f7aca5cda1ce9071
blob + c056426dd1569f33cd5459cba2caaa263be5224d
--- lib/got_lib_diff.h
+++ lib/got_lib_diff.h
@@ -62,8 +62,8 @@ const struct got_error *got_diffreg_result_free_right(
 const struct got_error *got_diffreg_close(FILE *, char *, size_t,
     FILE *, char *, size_t);
 
-const struct got_error *got_merge_diff3(int *, int, const char *, const char *,
-    const char *, const char *, const char *, const char *);
+const struct got_error *got_merge_diff3(int *, int, FILE *, FILE *, FILE *,
+    const char *, const char *, const char *);
 
 const struct got_error *got_diff_files(struct got_diffreg_result **, FILE *,
     const char *, FILE *, const char *, int, int, int, FILE *);
blob - d2012785f43070b2587e5ab750848b6dbce1f336
blob + 016b6dc14971cb46b8d7c725a70ebbaf4d909336
--- lib/worktree.c
+++ lib/worktree.c
@@ -728,25 +728,19 @@ check_file_contents_equal(int *same, FILE *f1, FILE *f
 }
 
 static const struct got_error *
-check_files_equal(int *same, const char *f1_path, const char *f2_path)
+check_files_equal(int *same, FILE *f1, FILE *f2)
 {
-	const struct got_error *err = NULL;
 	struct stat sb;
 	size_t size1, size2;
-	FILE *f1 = NULL, *f2 = NULL;
 
 	*same = 1;
 
-	if (lstat(f1_path, &sb) != 0) {
-		err = got_error_from_errno2("lstat", f1_path);
-		goto done;
-	}
+	if (fstat(fileno(f1), &sb) != 0)
+		return got_error_from_errno("fstat");
 	size1 = sb.st_size;
 
-	if (lstat(f2_path, &sb) != 0) {
-		err = got_error_from_errno2("lstat", f2_path);
-		goto done;
-	}
+	if (fstat(fileno(f2), &sb) != 0)
+		return got_error_from_errno("fstat");
 	size2 = sb.st_size;
 
 	if (size1 != size2) {
@@ -754,48 +748,35 @@ check_files_equal(int *same, const char *f1_path, cons
 		return NULL;
 	}
 
-	f1 = fopen(f1_path, "r");
-	if (f1 == NULL)
-		return got_error_from_errno2("fopen", f1_path);
+	if (fseek(f1, 0L, SEEK_SET) == -1)
+		return got_ferror(f1, GOT_ERR_IO);
+	if (fseek(f2, 0L, SEEK_SET) == -1)
+		return got_ferror(f2, GOT_ERR_IO);
 
-	f2 = fopen(f2_path, "r");
-	if (f2 == NULL) {
-		err = got_error_from_errno2("fopen", f2_path);
-		goto done;
-	}
-
-	err = check_file_contents_equal(same, f1, f2);
-done:
-	if (f1 && fclose(f1) == EOF && err == NULL)
-		err = got_error_from_errno("fclose");
-	if (f2 && fclose(f2) == EOF && err == NULL)
-		err = got_error_from_errno("fclose");
-
-	return err;
+	return check_file_contents_equal(same, f1, f2);
 }
 
 /*
- * Perform a 3-way merge where blob_orig acts as the common ancestor,
- * the file at deriv_path acts as the first derived version, and the
- * file on disk acts as the second derived version.
+ * Perform a 3-way merge where the file f_orig acts as the common
+ * ancestor, the file f_deriv acts as the first derived version,
+ * and the file at ondisk_path acts as both the target and the
+ * second derived version
  */
 static const struct got_error *
 merge_file(int *local_changes_subsumed, struct got_worktree *worktree,
-    struct got_blob_object *blob_orig, const char *ondisk_path,
-    const char *path, uint16_t st_mode, const char *deriv_path,
+    FILE *f_orig, const char *ondisk_path,
+    const char *path, uint16_t st_mode, FILE *f_deriv,
     const char *label_orig, const char *label_deriv,
     struct got_repository *repo,
     got_worktree_checkout_cb progress_cb, void *progress_arg)
 {
 	const struct got_error *err = NULL;
 	int merged_fd = -1;
-	FILE *f_orig = NULL;
-	char *blob_orig_path = NULL;
+	FILE *f_merged = NULL;
 	char *merged_path = NULL, *base_path = NULL;
 	int overlapcnt = 0;
 	char *parent = NULL;
-	char *symlink_path = NULL;
-	FILE *symlinkf = NULL;
+	FILE *f = NULL;
 
 	*local_changes_subsumed = 0;
 
@@ -812,30 +793,8 @@ merge_file(int *local_changes_subsumed, struct got_wor
 	if (err)
 		goto done;
 
-	free(base_path);
-	if (asprintf(&base_path, "%s/got-merge-blob-orig", parent) == -1) {
-		err = got_error_from_errno("asprintf");
-		base_path = NULL;
-		goto done;
-	}
-
-	err = got_opentemp_named(&blob_orig_path, &f_orig, base_path);
-	if (err)
-		goto done;
-	if (blob_orig) {
-		err = got_object_blob_dump_to_file(NULL, NULL, NULL, f_orig,
-		    blob_orig);
-		if (err)
-			goto done;
-	} else {
-		/*
-		 * If the file has no blob, this is an "add vs add" conflict,
-		 * and we simply use an empty ancestor file to make both files
-		 * appear in the merged result in their entirety.
-		 */
-	}
-
 	/* 
+	 * Open the merge target file.
 	 * In order the run a 3-way merge with a symlink we copy the symlink's
 	 * target path into a temporary file and use that file with diff3.
 	 */
@@ -851,28 +810,46 @@ merge_file(int *local_changes_subsumed, struct got_wor
 			base_path = NULL;
 			goto done;
 		}
-		err = got_opentemp_named(&symlink_path, &symlinkf, base_path);
-		if (err)
+		f = got_opentemp();
+		if (f == NULL) {
+			err = got_error_from_errno("got_opentemp");
 			goto done;
+		}
 		target_len = readlink(ondisk_path, target_path,
 		    sizeof(target_path));
 		if (target_len == -1) {
 			err = got_error_from_errno2("readlink", ondisk_path);
 			goto done;
 		}
-		n = fwrite(target_path, 1, target_len, symlinkf);
+		n = fwrite(target_path, 1, target_len, f);
 		if (n != target_len) {
-			err = got_ferror(symlinkf, GOT_ERR_IO);
+			err = got_ferror(f, GOT_ERR_IO);
 			goto done;
 		}
-		if (fflush(symlinkf) == EOF) {
-			err = got_error_from_errno2("fflush", symlink_path);
+		if (fflush(f) == EOF) {
+			err = got_error_from_errno("fflush");
 			goto done;
 		}
+		if (fseek(f, 0L, SEEK_SET) == -1) {
+			err = got_ferror(f, GOT_ERR_IO);
+			goto done;
+		}
+	} else {
+		int fd;
+		fd = open(ondisk_path, O_RDONLY | O_NOFOLLOW);
+		if (fd == -1) {
+			err = got_error_from_errno2("open", ondisk_path);
+			goto done;
+		}
+		f = fdopen(fd, "r");
+		if (f == NULL) {
+			err = got_error_from_errno2("fdopen", ondisk_path);
+			close(fd);
+			goto done;
+		}
 	}
 
-	err = got_merge_diff3(&overlapcnt, merged_fd, deriv_path,
-	    blob_orig_path, symlink_path ? symlink_path : ondisk_path,
+	err = got_merge_diff3(&overlapcnt, merged_fd, f_deriv, f_orig, f,
 	    label_deriv, label_orig, NULL);
 	if (err)
 		goto done;
@@ -887,15 +864,22 @@ merge_file(int *local_changes_subsumed, struct got_wor
 		goto done;
 	}
 
+	f_merged = fdopen(merged_fd, "r");
+	if (f_merged == NULL) {
+		err = got_error_from_errno("fdopen");
+		goto done;
+	}
+	merged_fd = -1;
+
 	/* Check if a clean merge has subsumed all local changes. */
 	if (overlapcnt == 0) {
-		err = check_files_equal(local_changes_subsumed, deriv_path,
-		    merged_path);
+		err = check_files_equal(local_changes_subsumed, f_deriv,
+		    f_merged);
 		if (err)
 			goto done;
 	}
 
-	if (fchmod(merged_fd, st_mode) != 0) {
+	if (fchmod(fileno(f_merged), st_mode) != 0) {
 		err = got_error_from_errno2("fchmod", merged_path);
 		goto done;
 	}
@@ -910,23 +894,14 @@ done:
 		if (merged_path)
 			unlink(merged_path);
 	}
-	if (symlink_path) {
-		if (unlink(symlink_path) == -1 && err == NULL)
-			err = got_error_from_errno2("unlink", symlink_path);
-	}
-	if (symlinkf && fclose(symlinkf) == EOF && err == NULL)
-		err = got_error_from_errno2("fclose", symlink_path);
-	free(symlink_path);
+	if (f && fclose(f) == EOF && err == NULL)
+		err = got_error_from_errno("fclose");
 	if (merged_fd != -1 && close(merged_fd) == -1 && err == NULL)
 		err = got_error_from_errno("close");
-	if (f_orig && fclose(f_orig) == EOF && err == NULL)
+	if (f_merged && fclose(f_merged) == EOF && err == NULL)
 		err = got_error_from_errno("fclose");
 	free(merged_path);
 	free(base_path);
-	if (blob_orig_path) {
-		unlink(blob_orig_path);
-		free(blob_orig_path);
-	}
 	free(parent);
 	return err;
 }
@@ -1134,7 +1109,8 @@ merge_blob(int *local_changes_subsumed, struct got_wor
     got_worktree_checkout_cb progress_cb, void *progress_arg)
 {
 	const struct got_error *err = NULL;
-	FILE *f_deriv = NULL;
+	FILE *f_orig = NULL, *f_deriv = NULL;
+	char *blob_orig_path = NULL;
 	char *blob_deriv_path = NULL, *base_path = NULL, *id_str = NULL;
 	char *label_deriv = NULL, *parent = NULL;
 
@@ -1144,7 +1120,35 @@ merge_blob(int *local_changes_subsumed, struct got_wor
 	if (err)
 		return err;
 
-	free(base_path);
+	if (blob_orig) {
+		if (asprintf(&base_path, "%s/got-merge-blob-orig",
+		    parent) == -1) {
+			err = got_error_from_errno("asprintf");
+			base_path = NULL;
+			goto done;
+		}
+
+		err = got_opentemp_named(&blob_orig_path, &f_orig, base_path);
+		if (err)
+			goto done;
+		err = got_object_blob_dump_to_file(NULL, NULL, NULL, f_orig,
+		    blob_orig);
+		if (err)
+			goto done;
+		free(base_path);
+	} else {
+		/*
+		 * No common ancestor exists. This is an "add vs add" conflict
+		 * and we simply use an empty ancestor file to make both files
+		 * appear in the merged result in their entirety.
+		 */
+		f_orig = got_opentemp();
+		if (f_orig == NULL) {
+			err = got_error_from_errno("got_opentemp");
+			goto done;
+		}
+	}
+
 	if (asprintf(&base_path, "%s/got-merge-blob-deriv", parent) == -1) {
 		err = got_error_from_errno("asprintf");
 		base_path = NULL;
@@ -1168,13 +1172,19 @@ merge_blob(int *local_changes_subsumed, struct got_wor
 		goto done;
 	}
 
-	err = merge_file(local_changes_subsumed, worktree, blob_orig,
-	    ondisk_path, path, st_mode, blob_deriv_path, label_orig,
-	    label_deriv, repo, progress_cb, progress_arg);
+	err = merge_file(local_changes_subsumed, worktree, f_orig,
+	    ondisk_path, path, st_mode, f_deriv, label_orig, label_deriv,
+	    repo, progress_cb, progress_arg);
 done:
+	if (f_orig && fclose(f_orig) == EOF && err == NULL)
+		err = got_error_from_errno("fclose");
 	if (f_deriv && fclose(f_deriv) == EOF && err == NULL)
 		err = got_error_from_errno("fclose");
 	free(base_path);
+	if (blob_orig_path) {
+		unlink(blob_orig_path);
+		free(blob_orig_path);
+	}
 	if (blob_deriv_path) {
 		unlink(blob_deriv_path);
 		free(blob_deriv_path);
@@ -7705,8 +7715,10 @@ unstage_hunks(struct got_object_id *staged_blob_id,
 	const struct got_error *err = NULL;
 	char *path_unstaged_content = NULL;
 	char *path_new_staged_content = NULL;
+	char *parent = NULL, *base_path = NULL;
+	char *blob_base_path = NULL;
 	struct got_object_id *new_staged_blob_id = NULL;
-	FILE *f = NULL;
+	FILE *f = NULL, *f_base = NULL;
 	struct stat sb;
 
 	err = create_unstaged_content(&path_unstaged_content,
@@ -7755,10 +7767,30 @@ unstage_hunks(struct got_object_id *staged_blob_id,
 		    progress_arg);
 	} else {
 		int local_changes_subsumed;
+
+		err = got_path_dirname(&parent, ondisk_path);
+		if (err)
+			return err;
+
+		if (asprintf(&base_path, "%s/got-unstage-blob-orig",
+		    parent) == -1) {
+			err = got_error_from_errno("asprintf");
+			base_path = NULL;
+			goto done;
+		}
+
+		err = got_opentemp_named(&blob_base_path, &f_base, base_path);
+		if (err)
+			goto done;
+		err = got_object_blob_dump_to_file(NULL, NULL, NULL, f_base,
+		    blob_base);
+		if (err)
+			goto done;
+
 		err = merge_file(&local_changes_subsumed, worktree,
-		    blob_base, ondisk_path, ie->path,
+		    f_base, ondisk_path, ie->path,
 		    got_fileindex_perms_to_st(ie),
-		    path_unstaged_content, label_orig, "unstaged",
+		    f, label_orig, "unstaged",
 		    repo, progress_cb, progress_arg);
 	}
 	if (err)
@@ -7779,10 +7811,17 @@ done:
 	if (path_new_staged_content &&
 	    unlink(path_new_staged_content) == -1 && err == NULL)
 		err = got_error_from_errno2("unlink", path_new_staged_content);
+	if (blob_base_path && unlink(blob_base_path) == -1 && err == NULL)
+		err = got_error_from_errno2("unlink", blob_base_path);
+	if (f_base && fclose(f_base) == EOF && err == NULL)
+		err = got_error_from_errno2("fclose", path_unstaged_content);
 	if (f && fclose(f) == EOF && err == NULL)
 		err = got_error_from_errno2("fclose", path_unstaged_content);
 	free(path_unstaged_content);
 	free(path_new_staged_content);
+	free(blob_base_path);
+	free(parent);
+	free(base_path);
 	return err;
 }