"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
batch up tree entries in imsg
To:
gameoftrees@openbsd.org
Date:
Wed, 18 May 2022 14:09:16 +0200

Download raw body.

Thread
Currently we use one imsg per tree entry.

Each imsg involves a separate malloc/free dance. It is faster to
utlize space available in each message by filling it with as many
tree entries as possible.

In my test case, this change reduces the 'loading objects' phase of
gotadmin pack from 3 minutes to 2.5 minutes (on OpenBSD src.git).

This patch depends on the tree parsing patch I sent earlier.

Tested on amd64 and sparc64.

ok (for both diffs)?

diff 7fa6646b4f5ef336127bda962d89ba86799b84dc a4bc9edd34cbdb5a3fe13b7c207c69484b6c6322
blob - c6937820f54e6d66c1cbf81056a6b916516c9c5d
blob + e719a95bde6bbe971668bb22ea3a72e2990ad927
--- lib/got_lib_privsep.h
+++ lib/got_lib_privsep.h
@@ -106,7 +106,7 @@ enum got_imsg_type {
 	GOT_IMSG_COMMIT_LOGMSG,
 	GOT_IMSG_TREE_REQUEST,
 	GOT_IMSG_TREE,
-	GOT_IMSG_TREE_ENTRY,
+	GOT_IMSG_TREE_ENTRIES,
 	GOT_IMSG_BLOB_REQUEST,
 	GOT_IMSG_BLOB_OUTFD,
 	GOT_IMSG_BLOB,
@@ -246,17 +246,23 @@ struct got_imsg_commit_object {
 	 */
 } __attribute__((__packed__));
 
-
-/* Structure for GOT_IMSG_TREE_ENTRY. */
 struct got_imsg_tree_entry {
 	char id[SHA1_DIGEST_LENGTH];
 	mode_t mode;
-	/* Followed by entry's name in remaining data of imsg buffer. */
+	size_t namelen;
+	/* Followed by namelen bytes of entry's name, not NUL-terminated. */
 } __attribute__((__packed__));
 
+/* Structure for GOT_IMSG_TREE_ENTRIES. */
+struct got_imsg_tree_entries {
+	size_t nentries; /* Number of tree entries contained in this message. */
+
+	/* Followed by nentries * struct got_imsg_tree_entry */
+};
+
 /* Structure for GOT_IMSG_TREE_OBJECT_REPLY data. */
 struct got_imsg_tree_object {
-	int nentries; /* This many TREE_ENTRY messages follow. */
+	int nentries; /* This many tree entries follow. */
 };
 
 /* Structure for GOT_IMSG_BLOB. */
blob - 7b7f894dda9fc261842dfd019c4e9c2f444508a0
blob + b47d652e432786fd20c6ff729e2cb3990037f756
--- lib/privsep.c
+++ lib/privsep.c
@@ -1489,64 +1489,96 @@ got_privsep_recv_commit(struct got_commit_object **com
 	return err;
 }
 
+static const struct got_error *
+send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries,
+    int idx0, int idxN, size_t len)
+{
+	static const struct got_error *err;
+	struct ibuf *wbuf;
+	struct got_imsg_tree_entries ientries;
+	int i;
+
+	wbuf = imsg_create(ibuf, GOT_IMSG_TREE_ENTRIES, 0, 0, len);
+	if (wbuf == NULL)
+		return got_error_from_errno("imsg_create TREE_ENTRY");
+
+	ientries.nentries = idxN - idx0 + 1;
+	if (imsg_add(wbuf, &ientries, sizeof(ientries)) == -1) {
+		err = got_error_from_errno("imsg_add TREE_ENTRY");
+		ibuf_free(wbuf);
+		return err;
+	}
+
+	for (i = idx0; i <= idxN; i++) {
+		struct got_parsed_tree_entry *pte = &entries[i];
+
+		/* Keep in sync with struct got_imsg_tree_object definition! */
+		if (imsg_add(wbuf, pte->id, SHA1_DIGEST_LENGTH) == -1) {
+			err = got_error_from_errno("imsg_add TREE_ENTRY");
+			ibuf_free(wbuf);
+			return err;
+		}
+		if (imsg_add(wbuf, &pte->mode, sizeof(pte->mode)) == -1) {
+			err = got_error_from_errno("imsg_add TREE_ENTRY");
+			ibuf_free(wbuf);
+			return err;
+		}
+		if (imsg_add(wbuf, &pte->namelen, sizeof(pte->namelen)) == -1) {
+			err = got_error_from_errno("imsg_add TREE_ENTRY");
+			ibuf_free(wbuf);
+			return err;
+		}
+		
+		/* Remaining bytes are the entry's name. */
+		if (imsg_add(wbuf, pte->name, pte->namelen) == -1) {
+			err = got_error_from_errno("imsg_add TREE_ENTRY");
+			ibuf_free(wbuf);
+			return err;
+		}
+	}
+
+	wbuf->fd = -1;
+	imsg_close(ibuf, wbuf);
+	return NULL;
+}
+
 const struct got_error *
 got_privsep_send_tree(struct imsgbuf *ibuf,
     struct got_parsed_tree_entry *entries, int nentries)
 {
 	const struct got_error *err = NULL;
 	struct got_imsg_tree_object itree;
-	size_t totlen;
-	int i, nimsg; /* number of imsg queued in ibuf */
+	size_t entries_len;
+	int i, j;
 
 	itree.nentries = nentries;
 	if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree))
 	    == -1)
 		return got_error_from_errno("imsg_compose TREE");
 
-	totlen = sizeof(itree);
-	nimsg = 1;
-	for (i = 0; i < nentries; i++) {
-		struct got_parsed_tree_entry *pte = &entries[i];
-		struct ibuf *wbuf;
-		size_t len = sizeof(struct got_imsg_tree_entry) + pte->namelen;
+	entries_len = sizeof(struct got_imsg_tree_entries);
+	i = 0;
+	for (j = 0; j < nentries; j++) {
+		struct got_parsed_tree_entry *pte = &entries[j];
+		size_t len = sizeof(*pte) + pte->namelen;
 
-		if (len > MAX_IMSGSIZE)
-			return got_error(GOT_ERR_NO_SPACE);
-
-		nimsg++;
-		if (totlen + len >= MAX_IMSGSIZE - (IMSG_HEADER_SIZE * nimsg)) {
-			err = flush_imsg(ibuf);
+		if (j > 0 &&
+		    entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
+			err = send_tree_entries(ibuf, entries,
+			    i, j - 1, entries_len);
 			if (err)
 				return err;
-			nimsg = 0;
+			i = j;
+			entries_len = sizeof(struct got_imsg_tree_entries);
 		}
 
-		wbuf = imsg_create(ibuf, GOT_IMSG_TREE_ENTRY, 0, 0, len);
-		if (wbuf == NULL)
-			return got_error_from_errno("imsg_create TREE_ENTRY");
+		entries_len += len;
+	}
 
-		/* Keep in sync with struct got_imsg_tree_object definition! */
-		if (imsg_add(wbuf, pte->id, SHA1_DIGEST_LENGTH) == -1) {
-			err = got_error_from_errno("imsg_add TREE_ENTRY");
-			ibuf_free(wbuf);
+	if (j > 0) {
+		err = send_tree_entries(ibuf, entries, i, j - 1, entries_len);
+		if (err)
 			return err;
-		}
-		if (imsg_add(wbuf, &pte->mode, sizeof(pte->mode)) == -1) {
-			err = got_error_from_errno("imsg_add TREE_ENTRY");
-			ibuf_free(wbuf);
-			return err;
-		}
-
-		if (imsg_add(wbuf, pte->name, pte->namelen) == -1) {
-			err = got_error_from_errno("imsg_add TREE_ENTRY");
-			ibuf_free(wbuf);
-			return err;
-		}
-
-		wbuf->fd = -1;
-		imsg_close(ibuf, wbuf);
-
-		totlen += len;
 	}
 
 	return flush_imsg(ibuf);
@@ -1560,6 +1592,7 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
 	    MIN(sizeof(struct got_imsg_error),
 	    sizeof(struct got_imsg_tree_object));
 	struct got_imsg_tree_object *itree;
+	size_t i;
 	int nentries = 0;
 
 	*tree = NULL;
@@ -1572,8 +1605,9 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
 		struct imsg imsg;
 		size_t n;
 		size_t datalen;
-		struct got_imsg_tree_entry *ite;
+		struct got_imsg_tree_entries *ientries;
 		struct got_tree_entry *te = NULL;
+		size_t te_offset;
 
 		n = imsg_get(ibuf, &imsg);
 		if (n == 0) {
@@ -1634,41 +1668,71 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
 			(*tree)->nentries = itree->nentries;
 			(*tree)->refcnt = 0;
 			break;
-		case GOT_IMSG_TREE_ENTRY:
+		case GOT_IMSG_TREE_ENTRIES:
 			/* This message should be preceeded by GOT_IMSG_TREE. */
 			if (*tree == NULL) {
 				err = got_error(GOT_ERR_PRIVSEP_MSG);
 				break;
 			}
-			if (datalen < sizeof(*ite) || datalen > MAX_IMSGSIZE) {
+			if (datalen <= sizeof(*ientries) ||
+			    datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
 				err = got_error(GOT_ERR_PRIVSEP_LEN);
 				break;
 			}
 
-			/* Remaining data contains the entry's name. */
-			datalen -= sizeof(*ite);
-			if (datalen == 0 || datalen > MAX_IMSGSIZE) {
-				err = got_error(GOT_ERR_PRIVSEP_LEN);
+			ientries = imsg.data;
+			if (ientries->nentries > INT_MAX) {
+				err = got_error_msg(GOT_ERR_NO_SPACE,
+				    "too many tree entries");
 				break;
 			}
-			ite = imsg.data;
+			te_offset = sizeof(*ientries);
+			for (i = 0; i < ientries->nentries; i++) {
+				struct got_imsg_tree_entry ite;
+				const char *te_name;
+				uint8_t *buf = imsg.data + te_offset;
 
-			if (datalen + 1 > sizeof(te->name)) {
-				err = got_error(GOT_ERR_NO_SPACE);
-				break;
-			}
-			if (nentries >= (*tree)->nentries) {
-				err = got_error(GOT_ERR_PRIVSEP_LEN);
-				break;
-			}
-			te = &(*tree)->entries[nentries];
-			memcpy(te->name, imsg.data + sizeof(*ite), datalen);
-			te->name[datalen] = '\0';
+				if (te_offset >= datalen) {
+					err = got_error(GOT_ERR_PRIVSEP_LEN);
+					break;
+				}
 
-			memcpy(te->id.sha1, ite->id, SHA1_DIGEST_LENGTH);
-			te->mode = ite->mode;
-			te->idx = nentries;
-			nentries++;
+				/* Might not be aligned, size is ~32 bytes. */
+				memcpy(&ite, buf, sizeof(ite));
+
+				if (ite.namelen >= sizeof(te->name)) {
+					err = got_error(GOT_ERR_PRIVSEP_LEN);
+					break;
+				}
+				if (te_offset + ite.namelen < te_offset) {
+					err = got_error(GOT_ERR_PRIVSEP_LEN);
+					break;
+				}
+				if (sizeof(ite) + ite.namelen < sizeof(ite)) {
+					err = got_error(GOT_ERR_PRIVSEP_LEN);
+					break;
+				}
+				if (te_offset + sizeof(ite) + ite.namelen >
+				    datalen) {
+					err = got_error(GOT_ERR_PRIVSEP_LEN);
+					break;
+				}
+
+				if (nentries >= (*tree)->nentries) {
+					err = got_error(GOT_ERR_PRIVSEP_LEN);
+					break;
+				}
+				te = &(*tree)->entries[nentries];
+				te_name = buf + sizeof(ite);
+				memcpy(te->name, te_name, ite.namelen);
+				te->name[ite.namelen] = '\0';
+				memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH);
+				te->mode = ite.mode;
+				te->idx = nentries;
+				nentries++;
+
+				te_offset += sizeof(ite) + ite.namelen;
+			}
 			break;
 		default:
 			err = got_error(GOT_ERR_PRIVSEP_MSG);