"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@openbsd.org>
Subject:
Re: batch up tree entries in imsg
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Wed, 18 May 2022 17:14:30 +0200

Download raw body.

Thread
Stefan Sperling <stsp@stsp.name> wrote:
> Currently we use one imsg per tree entry.
> 
> Each imsg involves a separate malloc/free dance. It is faster to
> utlize space available in each message by filling it with as many
> tree entries as possible.
> 
> In my test case, this change reduces the 'loading objects' phase of
> gotadmin pack from 3 minutes to 2.5 minutes (on OpenBSD src.git).
> 
> This patch depends on the tree parsing patch I sent earlier.
> 
> Tested on amd64 and sparc64.
> 
> ok (for both diffs)?

it reads fine but it doesn't apply.  (reminder to me to save hunks in
.rej files...)

there are two things below that i don't understand thought

> diff 7fa6646b4f5ef336127bda962d89ba86799b84dc a4bc9edd34cbdb5a3fe13b7c207c69484b6c6322
> blob - c6937820f54e6d66c1cbf81056a6b916516c9c5d
> blob + e719a95bde6bbe971668bb22ea3a72e2990ad927
> --- lib/got_lib_privsep.h
> +++ lib/got_lib_privsep.h
> @@ -106,7 +106,7 @@ enum got_imsg_type {
>  	GOT_IMSG_COMMIT_LOGMSG,
>  	GOT_IMSG_TREE_REQUEST,
>  	GOT_IMSG_TREE,
> -	GOT_IMSG_TREE_ENTRY,
> +	GOT_IMSG_TREE_ENTRIES,
>  	GOT_IMSG_BLOB_REQUEST,
>  	GOT_IMSG_BLOB_OUTFD,
>  	GOT_IMSG_BLOB,
> @@ -246,17 +246,23 @@ struct got_imsg_commit_object {
>  	 */
>  } __attribute__((__packed__));
>  
> -
> -/* Structure for GOT_IMSG_TREE_ENTRY. */
>  struct got_imsg_tree_entry {
>  	char id[SHA1_DIGEST_LENGTH];
>  	mode_t mode;
> -	/* Followed by entry's name in remaining data of imsg buffer. */
> +	size_t namelen;
> +	/* Followed by namelen bytes of entry's name, not NUL-terminated. */
>  } __attribute__((__packed__));
>  
> +/* Structure for GOT_IMSG_TREE_ENTRIES. */
> +struct got_imsg_tree_entries {
> +	size_t nentries; /* Number of tree entries contained in this message. */
> +
> +	/* Followed by nentries * struct got_imsg_tree_entry */
> +};
> +
>  /* Structure for GOT_IMSG_TREE_OBJECT_REPLY data. */
>  struct got_imsg_tree_object {
> -	int nentries; /* This many TREE_ENTRY messages follow. */
> +	int nentries; /* This many tree entries follow. */
>  };
>  
>  /* Structure for GOT_IMSG_BLOB. */
> blob - 7b7f894dda9fc261842dfd019c4e9c2f444508a0
> blob + b47d652e432786fd20c6ff729e2cb3990037f756
> --- lib/privsep.c
> +++ lib/privsep.c
> @@ -1489,64 +1489,96 @@ got_privsep_recv_commit(struct got_commit_object **com
>  	return err;
>  }
>  
> +static const struct got_error *
> +send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries,
> +    int idx0, int idxN, size_t len)
> +{
> +	static const struct got_error *err;
> +	struct ibuf *wbuf;
> +	struct got_imsg_tree_entries ientries;
> +	int i;
> +
> +	wbuf = imsg_create(ibuf, GOT_IMSG_TREE_ENTRIES, 0, 0, len);
> +	if (wbuf == NULL)
> +		return got_error_from_errno("imsg_create TREE_ENTRY");
> +
> +	ientries.nentries = idxN - idx0 + 1;
> +	if (imsg_add(wbuf, &ientries, sizeof(ientries)) == -1) {
> +		err = got_error_from_errno("imsg_add TREE_ENTRY");
> +		ibuf_free(wbuf);
> +		return err;
> +	}
> +
> +	for (i = idx0; i <= idxN; i++) {
> +		struct got_parsed_tree_entry *pte = &entries[i];
> +
> +		/* Keep in sync with struct got_imsg_tree_object definition! */
> +		if (imsg_add(wbuf, pte->id, SHA1_DIGEST_LENGTH) == -1) {
> +			err = got_error_from_errno("imsg_add TREE_ENTRY");
> +			ibuf_free(wbuf);
> +			return err;
> +		}
> +		if (imsg_add(wbuf, &pte->mode, sizeof(pte->mode)) == -1) {
> +			err = got_error_from_errno("imsg_add TREE_ENTRY");
> +			ibuf_free(wbuf);
> +			return err;
> +		}
> +		if (imsg_add(wbuf, &pte->namelen, sizeof(pte->namelen)) == -1) {
> +			err = got_error_from_errno("imsg_add TREE_ENTRY");
> +			ibuf_free(wbuf);
> +			return err;
> +		}
> +		
> +		/* Remaining bytes are the entry's name. */
> +		if (imsg_add(wbuf, pte->name, pte->namelen) == -1) {
> +			err = got_error_from_errno("imsg_add TREE_ENTRY");
> +			ibuf_free(wbuf);
> +			return err;
> +		}
> +	}
> +
> +	wbuf->fd = -1;
> +	imsg_close(ibuf, wbuf);
> +	return NULL;
> +}
> +
>  const struct got_error *
>  got_privsep_send_tree(struct imsgbuf *ibuf,
>      struct got_parsed_tree_entry *entries, int nentries)
>  {
>  	const struct got_error *err = NULL;
>  	struct got_imsg_tree_object itree;
> -	size_t totlen;
> -	int i, nimsg; /* number of imsg queued in ibuf */
> +	size_t entries_len;
> +	int i, j;
>  
>  	itree.nentries = nentries;
>  	if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree))
>  	    == -1)
>  		return got_error_from_errno("imsg_compose TREE");
>  
> -	totlen = sizeof(itree);
> -	nimsg = 1;
> -	for (i = 0; i < nentries; i++) {
> -		struct got_parsed_tree_entry *pte = &entries[i];
> -		struct ibuf *wbuf;
> -		size_t len = sizeof(struct got_imsg_tree_entry) + pte->namelen;
> +	entries_len = sizeof(struct got_imsg_tree_entries);
> +	i = 0;
> +	for (j = 0; j < nentries; j++) {
> +		struct got_parsed_tree_entry *pte = &entries[j];
> +		size_t len = sizeof(*pte) + pte->namelen;
>  
> -		if (len > MAX_IMSGSIZE)
> -			return got_error(GOT_ERR_NO_SPACE);
> -
> -		nimsg++;
> -		if (totlen + len >= MAX_IMSGSIZE - (IMSG_HEADER_SIZE * nimsg)) {
> -			err = flush_imsg(ibuf);
> +		if (j > 0 &&
> +		    entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
> +			err = send_tree_entries(ibuf, entries,
> +			    i, j - 1, entries_len);
>  			if (err)
>  				return err;
> -			nimsg = 0;
> +			i = j;
> +			entries_len = sizeof(struct got_imsg_tree_entries);
>  		}
>  
> -		wbuf = imsg_create(ibuf, GOT_IMSG_TREE_ENTRY, 0, 0, len);
> -		if (wbuf == NULL)
> -			return got_error_from_errno("imsg_create TREE_ENTRY");
> +		entries_len += len;
> +	}
>  
> -		/* Keep in sync with struct got_imsg_tree_object definition! */
> -		if (imsg_add(wbuf, pte->id, SHA1_DIGEST_LENGTH) == -1) {
> -			err = got_error_from_errno("imsg_add TREE_ENTRY");
> -			ibuf_free(wbuf);
> +	if (j > 0) {

maybe i'm not reading this correctly, but here isn't better to check for
`j != i-1'?  if somehow we exit the previous loop after setting `i = j'
then we enter here just to send 0 elements.  (assuming that's possible)

> +		err = send_tree_entries(ibuf, entries, i, j - 1, entries_len);
> +		if (err)
>  			return err;
> -		}
> -		if (imsg_add(wbuf, &pte->mode, sizeof(pte->mode)) == -1) {
> -			err = got_error_from_errno("imsg_add TREE_ENTRY");
> -			ibuf_free(wbuf);
> -			return err;
> -		}
> -
> -		if (imsg_add(wbuf, pte->name, pte->namelen) == -1) {
> -			err = got_error_from_errno("imsg_add TREE_ENTRY");
> -			ibuf_free(wbuf);
> -			return err;
> -		}
> -
> -		wbuf->fd = -1;
> -		imsg_close(ibuf, wbuf);
> -
> -		totlen += len;
>  	}
>  
>  	return flush_imsg(ibuf);
> @@ -1560,6 +1592,7 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
>  	    MIN(sizeof(struct got_imsg_error),
>  	    sizeof(struct got_imsg_tree_object));
>  	struct got_imsg_tree_object *itree;
> +	size_t i;
>  	int nentries = 0;
>  
>  	*tree = NULL;
> @@ -1572,8 +1605,9 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
>  		struct imsg imsg;
>  		size_t n;
>  		size_t datalen;
> -		struct got_imsg_tree_entry *ite;
> +		struct got_imsg_tree_entries *ientries;
>  		struct got_tree_entry *te = NULL;
> +		size_t te_offset;
>  
>  		n = imsg_get(ibuf, &imsg);
>  		if (n == 0) {
> @@ -1634,41 +1668,71 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
>  			(*tree)->nentries = itree->nentries;
>  			(*tree)->refcnt = 0;
>  			break;
> -		case GOT_IMSG_TREE_ENTRY:
> +		case GOT_IMSG_TREE_ENTRIES:
>  			/* This message should be preceeded by GOT_IMSG_TREE. */
>  			if (*tree == NULL) {
>  				err = got_error(GOT_ERR_PRIVSEP_MSG);
>  				break;
>  			}
> -			if (datalen < sizeof(*ite) || datalen > MAX_IMSGSIZE) {
> +			if (datalen <= sizeof(*ientries) ||
> +			    datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
>  				err = got_error(GOT_ERR_PRIVSEP_LEN);
>  				break;
>  			}
>  
> -			/* Remaining data contains the entry's name. */
> -			datalen -= sizeof(*ite);
> -			if (datalen == 0 || datalen > MAX_IMSGSIZE) {
> -				err = got_error(GOT_ERR_PRIVSEP_LEN);
> +			ientries = imsg.data;
> +			if (ientries->nentries > INT_MAX) {
> +				err = got_error_msg(GOT_ERR_NO_SPACE,
> +				    "too many tree entries");
>  				break;
>  			}
> -			ite = imsg.data;
> +			te_offset = sizeof(*ientries);
> +			for (i = 0; i < ientries->nentries; i++) {
> +				struct got_imsg_tree_entry ite;
> +				const char *te_name;
> +				uint8_t *buf = imsg.data + te_offset;
>  
> -			if (datalen + 1 > sizeof(te->name)) {
> -				err = got_error(GOT_ERR_NO_SPACE);
> -				break;
> -			}
> -			if (nentries >= (*tree)->nentries) {
> -				err = got_error(GOT_ERR_PRIVSEP_LEN);
> -				break;
> -			}
> -			te = &(*tree)->entries[nentries];
> -			memcpy(te->name, imsg.data + sizeof(*ite), datalen);
> -			te->name[datalen] = '\0';
> +				if (te_offset >= datalen) {
> +					err = got_error(GOT_ERR_PRIVSEP_LEN);
> +					break;
> +				}
>  
> -			memcpy(te->id.sha1, ite->id, SHA1_DIGEST_LENGTH);
> -			te->mode = ite->mode;
> -			te->idx = nentries;
> -			nentries++;
> +				/* Might not be aligned, size is ~32 bytes. */
> +				memcpy(&ite, buf, sizeof(ite));
> +
> +				if (ite.namelen >= sizeof(te->name)) {
> +					err = got_error(GOT_ERR_PRIVSEP_LEN);
> +					break;
> +				}
> +				if (te_offset + ite.namelen < te_offset) {
> +					err = got_error(GOT_ERR_PRIVSEP_LEN);
> +					break;
> +				}

I'm not sure I'm getting the meaning of this check:

> +				if (sizeof(ite) + ite.namelen < sizeof(ite)) {

ite.nameles is size_t so how can this evaluate to true?

> +					err = got_error(GOT_ERR_PRIVSEP_LEN);
> +					break;
> +				}
> +				if (te_offset + sizeof(ite) + ite.namelen >
> +				    datalen) {
> +					err = got_error(GOT_ERR_PRIVSEP_LEN);
> +					break;
> +				}
> +
> +				if (nentries >= (*tree)->nentries) {
> +					err = got_error(GOT_ERR_PRIVSEP_LEN);
> +					break;
> +				}
> +				te = &(*tree)->entries[nentries];
> +				te_name = buf + sizeof(ite);
> +				memcpy(te->name, te_name, ite.namelen);
> +				te->name[ite.namelen] = '\0';
> +				memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH);
> +				te->mode = ite.mode;
> +				te->idx = nentries;
> +				nentries++;
> +
> +				te_offset += sizeof(ite) + ite.namelen;
> +			}
>  			break;
>  		default:
>  			err = got_error(GOT_ERR_PRIVSEP_MSG);