"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Tracey Emery <tracey@traceyemery.net>
Subject:
Re: rate-limiting of gotadmin progress output
To:
gameoftrees@openbsd.org
Date:
Tue, 4 Jan 2022 14:47:44 -0700

Download raw body.

Thread
On Tue, Jan 04, 2022 at 09:38:42PM +0100, Stefan Sperling wrote:
> Some time ago naddy pointed out that 'gotadmin cleanup' produces a
> huge amount of progress output which easily overwhelms slow terminals.
> The same is true for 'gotadmin pack'.
> 
> The patch below uses a similar approach to the patch naddy suggested at
> the time. (naddy's patch was not applied because of naddy's own cosmetic
> concerns about his patch.)
> 
> With a thin API layer on top of clock_gettime() and timespecs, progress
> updates are rate-limited to every 500 msec, which seems fine to me.
> I would have no problem with tweaking this timeout if needed.
> 
> In my test case the volume of progress output from 'gotadmin pack -a'
> is reduced from 182Kb to 10Kb. The output volume of 'gotadmin cleanup'
> goes down from 736Kb to 2Kb.
> The run time of 'gotadmin cleanup' improves from 18 seconds to 12 seconds.
> 
> I am leaving other progress reports alone for now. We can easily
> introduce more use of this new rate-limiting API over time.
> 
> ok?

Looks good here. ok!

> 
> diff 0f639468088c75c028b204d850ac5b1e5052dd1f 19f1646bd8e01a81d9c170af20efecd6c5a57ee7
> blob - 7e44f914b454fcd320fd03610f840109df3f7539
> blob + 8b431936ed43a5398c391d18637ae6dbc0f5348f
> --- got/Makefile
> +++ got/Makefile
> @@ -13,7 +13,7 @@ SRCS=		got.c blame.c commit_graph.c delta.c diff.c \
>  		diff_myers.c diff_output.c diff_output_plain.c \
>  		diff_output_unidiff.c diff_output_edscript.c \
>  		diff_patience.c send.c deltify.c pack_create.c dial.c \
> -		bloom.c murmurhash2.c
> +		bloom.c murmurhash2.c ratelimit.c
>  
>  MAN =		${PROG}.1 got-worktree.5 git-repository.5 got.conf.5
>  
> blob - 47d9d105cc92076d9e33ac2abdf1adf89ab25dc9
> blob + 781133bbc9837ad999231c521ae9da3239c0232b
> --- gotadmin/Makefile
> +++ gotadmin/Makefile
> @@ -8,7 +8,7 @@ SRCS=		gotadmin.c \
>  		inflate.c lockfile.c object.c object_cache.c object_create.c \
>  		object_idset.c object_parse.c opentemp.c pack.c pack_create.c \
>  		path.c privsep.c reference.c repository.c repository_admin.c \
> -		worktree_open.c sha1.c bloom.c murmurhash2.c
> +		worktree_open.c sha1.c bloom.c murmurhash2.c ratelimit.c
>  MAN =		${PROG}.1
>  
>  CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib
> blob - /dev/null
> blob + 9b95dcd8f24e8a9a1ad8900a16ece71c03e03a94 (mode 644)
> --- /dev/null
> +++ lib/got_lib_ratelimit.h
> @@ -0,0 +1,23 @@
> +/*
> + * Copyright (c) 2022 Stefan Sperling <stsp@openbsd.org>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +struct got_ratelimit {
> +	struct timespec last;
> +	struct timespec interval;
> +};
> +
> +void got_ratelimit_init(struct got_ratelimit *, time_t, unsigned int);
> +const struct got_error *got_ratelimit_check(int *, struct got_ratelimit *);
> blob - 5e229ab44b13c2e63ed9807f70837898eeda1e43
> blob + 2e421bd245bb3702600302716d83053e5785c51a
> --- lib/pack_create.c
> +++ lib/pack_create.c
> @@ -20,6 +20,7 @@
>  #include <sys/tree.h>
>  #include <sys/uio.h>
>  #include <sys/stat.h>
> +#include <sys/time.h>
>  
>  #include <stdint.h>
>  #include <imsg.h>
> @@ -27,6 +28,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <sha1.h>
> +#include <time.h>
>  #include <limits.h>
>  #include <zlib.h>
>  
> @@ -47,6 +49,7 @@
>  #include "got_lib_pack.h"
>  #include "got_lib_privsep.h"
>  #include "got_lib_repository.h"
> +#include "got_lib_ratelimit.h"
>  
>  #ifndef MIN
>  #define	MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
> @@ -249,12 +252,30 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o
>  	return NULL;
>  }
>  
> +static const struct got_error *
> +report_progress(got_pack_progress_cb progress_cb, void *progress_arg,
> +    struct got_ratelimit *rl, off_t packfile_size, int ncommits,
> +    int nobj_total, int obj_deltify, int nobj_written)
> +{
> +	const struct got_error *err;
> +	int elapsed;
>  
> +	if (progress_cb == NULL)
> +		return NULL;
> +
> +	err = got_ratelimit_check(&elapsed, rl);
> +	if (err || !elapsed)
> +		return err;
> +
> +	return progress_cb(progress_arg, packfile_size, ncommits,
> +	    nobj_total, obj_deltify, nobj_written);
> +}
> +
>  static const struct got_error *
>  pick_deltas(struct got_pack_meta **meta, int nmeta, int nours,
>      FILE *delta_cache, struct got_repository *repo,
>      got_pack_progress_cb progress_cb, void *progress_arg,
> -    got_cancel_cb cancel_cb, void *cancel_arg)
> +    struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
>  {
>  	const struct got_error *err = NULL;
>  	struct got_pack_meta *m = NULL, *base = NULL;
> @@ -271,11 +292,10 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
>  			if (err)
>  				break;
>  		}
> -		if (progress_cb) {
> -			err = progress_cb(progress_arg, 0L, nours, nmeta, i, 0);
> -			if (err)
> -				goto done;
> -		}
> +		err = report_progress(progress_cb, progress_arg, rl,
> +		    0L, nours, nmeta, i, 0);
> +		if (err)
> +			goto done;
>  		m = meta[i];
>  
>  		if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
> @@ -923,7 +943,7 @@ read_meta(struct got_pack_meta ***meta, int *nmeta,
>      struct got_object_id **theirs, int ntheirs,
>      struct got_object_id **ours, int nours, struct got_repository *repo,
>      int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
> -    got_cancel_cb cancel_cb, void *cancel_arg)
> +    struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
>  {
>  	const struct got_error *err = NULL;
>  	struct got_object_id **ids = NULL;
> @@ -964,12 +984,10 @@ read_meta(struct got_pack_meta ***meta, int *nmeta,
>  		    loose_obj_only, cancel_cb, cancel_arg);
>  		if (err)
>  			goto done;
> -		if (progress_cb) {
> -			err = progress_cb(progress_arg, 0L, nours,
> -			    v.nmeta, 0, 0);
> -			if (err)
> -				goto done;
> -		}
> +		err = report_progress(progress_cb, progress_arg, rl,
> +		    0L, nours, v.nmeta, 0, 0);
> +		if (err)
> +			goto done;
>  	}
>  
>  	for (i = 0; i < ntheirs; i++) {
> @@ -990,12 +1008,10 @@ read_meta(struct got_pack_meta ***meta, int *nmeta,
>  		    loose_obj_only, cancel_cb, cancel_arg);
>  		if (err)
>  			goto done;
> -		if (progress_cb) {
> -			err = progress_cb(progress_arg, 0L, nours,
> -			    v.nmeta, 0, 0);
> -			if (err)
> -				goto done;
> -		}
> +		err = report_progress(progress_cb, progress_arg, rl,
> +		    0L, nours, v.nmeta, 0, 0);
> +		if (err)
> +			goto done;
>  	}
>  
>  	for (i = 0; i < nobj; i++) {
> @@ -1003,12 +1019,12 @@ read_meta(struct got_pack_meta ***meta, int *nmeta,
>  		    loose_obj_only, cancel_cb, cancel_arg);
>  		if (err)
>  			goto done;
> -		if (progress_cb) {
> -			err = progress_cb(progress_arg, 0L, nours,
> -			    v.nmeta, 0, 0);
> -			if (err)
> -				goto done;
> -		}
> +		if (err)
> +			goto done;
> +		err = report_progress(progress_cb, progress_arg, rl,
> +		    0L, nours, v.nmeta, 0, 0);
> +		if (err)
> +			goto done;
>  	}
>  
>  	for (i = 0; i < nours; i++) {
> @@ -1029,14 +1045,17 @@ read_meta(struct got_pack_meta ***meta, int *nmeta,
>  		    loose_obj_only, cancel_cb, cancel_arg);
>  		if (err)
>  			goto done;
> -		if (progress_cb) {
> -			err = progress_cb(progress_arg, 0L, nours,
> -			    v.nmeta, 0, 0);
> -			if (err)
> -				goto done;
> -		}
> +		err = report_progress(progress_cb, progress_arg, rl,
> +		    0L, nours, v.nmeta, 0, 0);
> +		if (err)
> +			goto done;
>  	}
>  
> +	if (progress_cb) {
> +		err = progress_cb(progress_arg, 0L, nours, v.nmeta, 0, 0);
> +		if (err)
> +			goto done;
> +	}
>  done:
>  	for (i = 0; i < nobj; i++) {
>  		free(ids[i]);
> @@ -1136,6 +1155,7 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
>      struct got_pack_meta **meta, int nmeta, int nours,
>      int use_offset_deltas, struct got_repository *repo,
>      got_pack_progress_cb progress_cb, void *progress_arg,
> +    struct got_ratelimit *rl,
>      got_cancel_cb cancel_cb, void *cancel_arg)
>  {
>  	const struct got_error *err = NULL;
> @@ -1167,12 +1187,10 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
>  		goto done;
>  	qsort(meta, nmeta, sizeof(struct got_pack_meta *), write_order_cmp);
>  	for (i = 0; i < nmeta; i++) {
> -		if (progress_cb) {
> -			err = progress_cb(progress_arg, packfile_size, nours,
> -			    nmeta, nmeta, i);
> -			if (err)
> -				goto done;
> -		}
> +		err = report_progress(progress_cb, progress_arg, rl,
> +		    packfile_size, nours, nmeta, nmeta, i);
> +		if (err)
> +			goto done;
>  		m = meta[i];
>  		m->off = ftello(packfile);
>  		err = got_object_raw_open(&raw, &outfd, repo, &m->id);
> @@ -1281,10 +1299,12 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
>  		err = got_ferror(packfile, GOT_ERR_IO);
>  	packfile_size += SHA1_DIGEST_LENGTH;
>  	packfile_size += sizeof(struct got_packfile_hdr);
> -	err = progress_cb(progress_arg, packfile_size, nours,
> -	    nmeta, nmeta, nmeta);
> -	if (err)
> -		goto done;
> +	if (progress_cb) {
> +		err = progress_cb(progress_arg, packfile_size, nours,
> +		    nmeta, nmeta, nmeta);
> +		if (err)
> +			goto done;
> +	}
>  done:
>  	if (delta_file && fclose(delta_file) == EOF && err == NULL)
>  		err = got_error_from_errno("fclose");
> @@ -1307,9 +1327,13 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
>  	struct got_pack_meta **meta;
>  	int nmeta;
>  	FILE *delta_cache = NULL;
> +	struct got_ratelimit rl;
>  
> +	got_ratelimit_init(&rl, 0, 500);
> +
>  	err = read_meta(&meta, &nmeta, theirs, ntheirs, ours, nours, repo,
> -	    loose_obj_only, progress_cb, progress_arg, cancel_cb, cancel_arg);
> +	    loose_obj_only, progress_cb, progress_arg, &rl,
> +	    cancel_cb, cancel_arg);
>  	if (err)
>  		return err;
>  
> @@ -1326,7 +1350,7 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
>  
>  	if (nmeta > 0) {
>  		err = pick_deltas(meta, nmeta, nours, delta_cache, repo,
> -		    progress_cb, progress_arg, cancel_cb, cancel_arg);
> +		    progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
>  		if (err)
>  			goto done;
>  		if (fseeko(delta_cache, 0L, SEEK_SET) == -1) {
> @@ -1336,7 +1360,7 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
>  	}
>  
>  	err = genpack(packsha1, packfile, delta_cache, meta, nmeta, nours, 1,
> -	    repo, progress_cb, progress_arg, cancel_cb, cancel_arg);
> +	    repo, progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
>  	if (err)
>  		goto done;
>  done:
> blob - /dev/null
> blob + 219e7905e4765ae964a3ab7a1e8145d1ce344c2f (mode 644)
> --- /dev/null
> +++ lib/ratelimit.c
> @@ -0,0 +1,55 @@
> +/*
> + * Copyright (c) 2022 Stefan Sperling <stsp@openbsd.org>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/time.h>
> +
> +#include <stdio.h>
> +#include <limits.h>
> +#include <string.h>
> +#include <time.h>
> +
> +#include "got_lib_ratelimit.h"
> +
> +#include "got_error.h"
> +
> +void
> +got_ratelimit_init(struct got_ratelimit *rl, time_t interval_sec,
> +    unsigned int interval_msec)
> +{
> +	memset(rl, 0, sizeof(*rl));
> +	rl->interval.tv_sec = interval_sec;
> +	rl->interval.tv_nsec = interval_msec * 1000000UL;
> +}
> +
> +const struct got_error *
> +got_ratelimit_check(int *elapsed, struct got_ratelimit *rl)
> +{
> +	struct timespec now, delta;
> +
> +	if (clock_gettime(CLOCK_MONOTONIC, &now) == -1)
> +		return got_error_from_errno("clock_gettime");
> +
> +	if (timespecisset(&rl->last)) {
> +		timespecsub(&now, &rl->last, &delta);
> +		*elapsed = timespeccmp(&delta, &rl->interval, >=) ? 1 : 0;
> +	} else
> +		*elapsed = 1;
> +
> +	if (*elapsed)
> +		rl->last = now;
> +
> +	return NULL;
> +}
> blob - 4e1253bcafe34044a40c52250b6a96a552b4b609
> blob + 40af887d57530bc64cde9dbfdd26426592ae93c3
> --- lib/repository_admin.c
> +++ lib/repository_admin.c
> @@ -54,6 +54,7 @@
>  #include "got_lib_pack_create.h"
>  #include "got_lib_sha1.h"
>  #include "got_lib_lockfile.h"
> +#include "got_lib_ratelimit.h"
>  
>  #ifndef nitems
>  #define nitems(_a)	(sizeof((_a)) / sizeof((_a)[0]))
> @@ -598,9 +599,27 @@ done:
>  }
>  
>  static const struct got_error *
> +report_cleanup_progress(got_cleanup_progress_cb progress_cb,
> +    void *progress_arg, struct got_ratelimit *rl,
> +    int nloose, int ncommits, int npurged)
> +{
> +	const struct got_error *err;
> +	int elapsed;
> +
> +	if (progress_cb == NULL)
> +		return NULL;
> +
> +	err = got_ratelimit_check(&elapsed, rl);
> +	if (err || !elapsed)
> +		return err;
> +
> +	return progress_cb(progress_arg, nloose, ncommits, npurged);
> +}
> +
> +static const struct got_error *
>  get_loose_object_ids(struct got_object_idset **loose_ids, off_t *ondisk_size,
>      got_cleanup_progress_cb progress_cb, void *progress_arg,
> -    struct got_repository *repo)
> +    struct got_ratelimit *rl, struct got_repository *repo)
>  {
>  	const struct got_error *err = NULL;
>  	char *path_objects = NULL, *path = NULL;
> @@ -688,13 +707,11 @@ get_loose_object_ids(struct got_object_idset **loose_i
>  			err = got_object_idset_add(*loose_ids, &id, NULL);
>  			if (err)
>  				goto done;
> -			if (progress_cb) {
> -				err = progress_cb(progress_arg,
> -				    got_object_idset_num_elements(*loose_ids),
> -				    -1, -1);
> -				if (err)
> -					goto done;
> -			}
> +			err = report_cleanup_progress(progress_cb,
> +			    progress_arg, rl,
> +			    got_object_idset_num_elements(*loose_ids), -1, -1);
> +			if (err)
> +				goto done;
>  		}
>  
>  		if (closedir(dir) != 0) {
> @@ -878,7 +895,8 @@ static const struct got_error *
>  load_commit_or_tag(struct got_object_idset *loose_ids, int *ncommits,
>      int *npacked, struct got_object_idset *traversed_ids,
>      struct got_object_id *id, struct got_repository *repo,
> -    got_cleanup_progress_cb progress_cb, void *progress_arg, int nloose,
> +    got_cleanup_progress_cb progress_cb, void *progress_arg,
> +    struct got_ratelimit *rl, int nloose,
>      got_cancel_cb cancel_cb, void *cancel_arg)
>  {
>  	const struct got_error *err;
> @@ -986,11 +1004,10 @@ load_commit_or_tag(struct got_object_idset *loose_ids,
>  		if (commit || tag)
>  			(*ncommits)++; /* scanned tags are counted as commits */
>  
> -		if (progress_cb) {
> -			err = progress_cb(progress_arg, nloose, *ncommits, -1);
> -			if (err)
> -				break;
> -		}
> +		err = report_cleanup_progress(progress_cb, progress_arg, rl,
> +		    nloose, *ncommits, -1);
> +		if (err)
> +			break;
>   
>  		if (commit) {
>  			/* Find parent commits to scan. */
> @@ -1024,6 +1041,7 @@ struct purge_loose_object_arg {
>  	struct got_repository *repo;
>  	got_cleanup_progress_cb progress_cb;
>  	void *progress_arg;
> +	struct got_ratelimit *rl;
>  	int nloose;
>  	int ncommits;
>  	int npurged;
> @@ -1075,10 +1093,10 @@ purge_loose_object(struct got_object_id *id, void *dat
>  
>  		a->npurged++;
>  		a->size_purged += sb.st_size;
> -		if (a->progress_cb) {
> -			err = a->progress_cb(a->progress_arg, a->nloose,
> -			    a->ncommits, a->npurged);
> -		}
> +		err = report_cleanup_progress(a->progress_cb, a->progress_arg,
> +		    a->rl, a->nloose, a->ncommits, a->npurged);
> +		if (err)
> +			goto done;
>  	}
>  done:
>  	if (fd != -1 && close(fd) == -1 && err == NULL)
> @@ -1104,15 +1122,17 @@ got_repo_purge_unreferenced_loose_objects(struct got_r
>  	struct got_reflist_entry *re;
>  	struct purge_loose_object_arg arg;
>  	time_t max_mtime = 0;
> +	struct got_ratelimit rl;
>  
>  	TAILQ_INIT(&refs);
> +	got_ratelimit_init(&rl, 0, 500);
>  
>  	*size_before = 0;
>  	*size_after = 0;
>  	*npacked = 0;
>  
>  	err = get_loose_object_ids(&loose_ids, size_before,
> -	    progress_cb, progress_arg, repo);
> +	    progress_cb, progress_arg, &rl, repo);
>  	if (err)
>  		return err;
>  	nloose = got_object_idset_num_elements(loose_ids);
> @@ -1153,23 +1173,17 @@ got_repo_purge_unreferenced_loose_objects(struct got_r
>  	for (i = 0; i < nreferenced; i++) {
>  		struct got_object_id *id = referenced_ids[i];
>  		err = load_commit_or_tag(loose_ids, &ncommits, npacked,
> -		    traversed_ids, id, repo, progress_cb, progress_arg, nloose, 
> -		    cancel_cb, cancel_arg);
> +		    traversed_ids, id, repo, progress_cb, progress_arg, &rl,
> +		    nloose, cancel_cb, cancel_arg);
>  		if (err)
>  			goto done;
>  	}
>  
> -	/* Produce a final progress report in case no objects can be purged. */
> -	if (got_object_idset_num_elements(loose_ids) == 0 && progress_cb) {
> -		err = progress_cb(progress_arg, nloose, ncommits, 0);
> -		if (err)
> -			goto done;
> -	}
> -
>  	/* Any remaining loose objects are unreferenced and can be purged. */
>  	arg.repo = repo;
>  	arg.progress_arg = progress_arg;
>  	arg.progress_cb = progress_cb;
> +	arg.rl = &rl;
>  	arg.nloose = nloose;
>  	arg.npurged = 0;
>  	arg.size_purged = 0;
> @@ -1181,6 +1195,14 @@ got_repo_purge_unreferenced_loose_objects(struct got_r
>  	if (err)
>  		goto done;
>  	*size_after = *size_before - arg.size_purged;
> +
> +	/* Produce a final progress report. */
> +	if (progress_cb) {
> +		err = progress_cb(progress_arg, nloose, ncommits,
> +		    got_object_idset_num_elements(loose_ids));
> +		if (err)
> +			goto done;
> +	}
>  done:
>  	got_object_idset_free(loose_ids);
>  	got_object_idset_free(traversed_ids);
> 

-- 

Tracey Emery