From: Mark Jamsek Subject: Re: introducing gotadmin dump To: Omar Polo Cc: Stefan Sperling , gameoftrees@openbsd.org Date: Fri, 07 Jul 2023 21:10:12 +1000 Omar Polo wrote: > ----------------------------------------------- > commit 96fc93d02a405c26b746df83e7f892bc8546109d (fe) > from: Omar Polo > date: Thu Jul 6 15:41:24 2023 UTC > > add an initial implementation of gotadmin dump > > gotadmin dump is used to export (part of) the history of the > repository; at the moment it only generates git bundles (which are > pack files with a header) but support to generate a fast-import > stream is planned. This is great, op! ok with a couple minor nits added to the previous doc suggestions > diff 27555e8e6053ab0cc846d201757b588d5a79293f 96fc93d02a405c26b746df83e7f892bc8546109d > commit - 27555e8e6053ab0cc846d201757b588d5a79293f > commit + 96fc93d02a405c26b746df83e7f892bc8546109d > blob - 6e194139586ae1731a9aef3771d60596d3d0c756 > blob + 80e118616d09947850bdc2b12829cac8eb2c8b3f > --- gotadmin/Makefile > +++ gotadmin/Makefile ... > blob - 5551b60b73215eb11a82c034e1d2601ed9a15c31 > blob + 100d5e7f9f6bc1aed228af50e30f1e58677c5ef4 > --- gotadmin/gotadmin.c > +++ gotadmin/gotadmin.c > @@ -39,6 +39,7 @@ > #include "got_cancel.h" > #include "got_repository.h" > #include "got_repository_admin.h" > +#include "got_repository_dump.h" > #include "got_gotconfig.h" > #include "got_path.h" > #include "got_privsep.h" > @@ -86,6 +87,7 @@ __dead static void usage_cleanup(void); > __dead static void usage_indexpack(void); > __dead static void usage_listpack(void); > __dead static void usage_cleanup(void); > +__dead static void usage_dump(void); > > static const struct got_error* cmd_init(int, char *[]); > static const struct got_error* cmd_info(int, char *[]); > @@ -93,6 +95,7 @@ static const struct got_error* cmd_cleanup(int, char > static const struct got_error* cmd_indexpack(int, char *[]); > static const struct got_error* cmd_listpack(int, char *[]); > static const struct got_error* cmd_cleanup(int, char *[]); > +static const struct got_error* cmd_dump(int, char *[]); > > static const struct gotadmin_cmd gotadmin_commands[] = { > { "init", cmd_init, usage_init, "" }, > @@ -101,6 +104,7 @@ static const struct gotadmin_cmd gotadmin_commands[] = > { "indexpack", cmd_indexpack, usage_indexpack,"ix" }, > { "listpack", cmd_listpack, usage_listpack, "ls" }, > { "cleanup", cmd_cleanup, usage_cleanup, "cl" }, > + { "dump", cmd_dump, usage_dump, "" }, > }; > > static void > @@ -459,6 +463,7 @@ struct got_pack_progress_arg { > } > > struct got_pack_progress_arg { > + FILE *out; > char last_scaled_size[FMT_SCALED_STRSIZE]; > int last_ncolored; > int last_nfound; > @@ -475,20 +480,20 @@ print_load_info(int print_colored, int print_found, in > }; > > static void > -print_load_info(int print_colored, int print_found, int print_trees, > +print_load_info(FILE *out, int print_colored, int print_found, int print_trees, > int ncolored, int nfound, int ntrees) > { > if (print_colored) { > - printf("%d commit%s colored", ncolored, > + fprintf(out, "%d commit%s colored", ncolored, > ncolored == 1 ? "" : "s"); > } > if (print_found) { > - printf("%s%d object%s found", > + fprintf(out, "%s%d object%s found", > ncolored > 0 ? "; " : "", > nfound, nfound == 1 ? "" : "s"); > } > if (print_trees) { > - printf("; %d tree%s scanned", ntrees, > + fprintf(out, "; %d tree%s scanned", ntrees, > ntrees == 1 ? "" : "s"); > } > } > @@ -528,16 +533,16 @@ pack_progress(void *arg, int ncolored, int nfound, int > > if ((print_colored || print_found || print_trees) && > !a->loading_done) { > - printf("\r"); > - print_load_info(print_colored, print_found, print_trees, > - ncolored, nfound, ntrees); > + fprintf(a->out, "\r"); > + print_load_info(a->out, print_colored, print_found, > + print_trees, ncolored, nfound, ntrees); > a->printed_something = 1; > - fflush(stdout); > + fflush(a->out); > return NULL; > } else if (!a->loading_done) { > - printf("\r"); > - print_load_info(1, 1, 1, ncolored, nfound, ntrees); > - printf("\n"); > + fprintf(a->out, "\r"); > + print_load_info(a->out, 1, 1, 1, ncolored, nfound, ntrees); > + fprintf(a->out, "\n"); > a->loading_done = 1; > } > > @@ -585,22 +590,22 @@ pack_progress(void *arg, int ncolored, int nfound, int > } > > if (print_searching || print_total || print_deltify || print_written) > - printf("\r"); > + fprintf(a->out, "\r"); > if (print_searching) > - printf("packing %d reference%s", ncommits, > + fprintf(a->out, "packing %d reference%s", ncommits, > ncommits == 1 ? "" : "s"); > if (print_total) > - printf("; %d object%s", nobj_total, > + fprintf(a->out, "; %d object%s", nobj_total, > nobj_total == 1 ? "" : "s"); > if (print_deltify) > - printf("; deltify: %d%%", p_deltify); > + fprintf(a->out, "; deltify: %d%%", p_deltify); > if (print_written) > - printf("; writing pack: %*s %d%%", FMT_SCALED_STRSIZE - 2, > - scaled_size, p_written); > + fprintf(a->out, "; writing pack: %*s %d%%", > + FMT_SCALED_STRSIZE - 2, scaled_size, p_written); > if (print_searching || print_total || print_deltify || > print_written) { > a->printed_something = 1; > - fflush(stdout); > + fflush(a->out); > } > return NULL; > } > @@ -799,6 +804,7 @@ cmd_pack(int argc, char *argv[]) > } > > memset(&ppa, 0, sizeof(ppa)); > + ppa.out = stdout; > ppa.last_scaled_size[0] = '\0'; > ppa.last_p_indexed = -1; > ppa.last_p_resolved = -1; > @@ -900,6 +906,7 @@ cmd_indexpack(int argc, char *argv[]) > goto done; > > memset(&ppa, 0, sizeof(ppa)); > + ppa.out = stdout; > ppa.last_scaled_size[0] = '\0'; > ppa.last_p_indexed = -1; > ppa.last_p_resolved = -1; > @@ -1399,3 +1406,140 @@ done: > free(repo_path); > return error; > } > + > +__dead static void > +usage_dump(void) > +{ > + fprintf(stderr, "usage: %s dump [-q] [-r repository-path] " > + "[-x reference] [reference]...\n", > + getprogname()); nit: doesn't need to wrap: "[-x reference] [reference]...\n", getprogname()); > + exit(1); > +} > + > +static const struct got_error * > +cmd_dump(int argc, char *argv[]) > +{ > + const struct got_error *error = NULL; > + struct got_pack_progress_arg ppa; > + struct got_repository *repo = NULL; > + struct got_pathlist_head exclude_args; > + struct got_pathlist_entry *pe; > + struct got_reflist_head exclude_refs; > + struct got_reflist_head include_refs; > + struct got_reflist_entry *re, *new; > + const char *refname; > + char *repo_path = NULL; > + int *pack_fds = NULL; > + int verbosity = 0; > + int i, ch; > + > + TAILQ_INIT(&exclude_args); > + TAILQ_INIT(&exclude_refs); > + TAILQ_INIT(&include_refs); > + > +#ifndef PROFILE > + if (pledge("stdio rpath wpath cpath flock proc exec sendfd unveil", > + NULL) == -1) > + err(1, "pledge"); > +#endif > + > + while ((ch = getopt(argc, argv, "qr:x:")) != -1) { > + switch (ch) { > + case 'q': > + verbosity = -1; > + break; > + case 'r': > + repo_path = realpath(optarg, NULL); > + if (repo_path == NULL) > + return got_error_from_errno2("realpath", > + optarg); > + got_path_strip_trailing_slashes(repo_path); > + break; > + case 'x': > + error = got_pathlist_append(&exclude_args, > + optarg, NULL); > + if (error) > + return error; > + break; > + default: > + usage_dump(); > + /* NOTREACHED */ > + } > + } > + argc -= optind; > + argv += optind; > + > + if (repo_path == NULL) { > + error = get_repo_path(&repo_path); > + if (error) > + goto done; > + } > + error = got_repo_pack_fds_open(&pack_fds); > + if (error != NULL) > + goto done; > + error = got_repo_open(&repo, repo_path, NULL, pack_fds); > + if (error) > + goto done; > + > + error = apply_unveil(got_repo_get_path_git_dir(repo), 0); Can we keep the repo read only for now? error = apply_unveil(got_repo_get_path_git_dir(repo), 1); > + if (error) > + goto done; > + > + TAILQ_FOREACH(pe, &exclude_args, entry) { > + refname = pe->path; > + error = add_ref(&new, &exclude_refs, refname, repo); > + if (error) > + goto done; > + } > + > + if (argc == 0) { > + error = got_ref_list(&include_refs, repo, "", > + got_ref_cmp_by_name, NULL); > + if (error) > + goto done; > + } else { > + for (i = 0; i < argc; i++) { > + got_path_strip_trailing_slashes(argv[i]); > + refname = argv[i]; > + error = add_ref(&new, &include_refs, refname, repo); > + if (error) > + goto done; > + } > + } > + > + /* Ignore references in the refs/got/ namespace. */ > + TAILQ_FOREACH_SAFE(re, &include_refs, entry, new) { > + refname = got_ref_get_name(re->ref); > + if (strncmp("refs/got/", refname, 9) != 0) > + continue; > + TAILQ_REMOVE(&include_refs, re, entry); > + got_ref_close(re->ref); > + free(re); > + } > + > + memset(&ppa, 0, sizeof(ppa)); > + ppa.out = stderr; > + ppa.verbosity = verbosity; > + > + error = got_repo_dump(stdout, &include_refs, &exclude_refs, > + repo, pack_progress, &ppa, check_cancelled, NULL); > + if (ppa.printed_something) > + fprintf(stderr, "\n"); > + done: > + if (repo) > + got_repo_close(repo); > + > + if (pack_fds) { > + const struct got_error *pack_err; > + > + pack_err = got_repo_pack_fds_close(pack_fds); > + if (error == NULL) > + error = pack_err; > + } > + > + got_pathlist_free(&exclude_args, GOT_PATHLIST_FREE_NONE); > + got_ref_list_free(&exclude_refs); > + got_ref_list_free(&include_refs); we need to free repo_path too: free(repo_path); > + > + return error; > +} > blob - /dev/null > blob + 73dc382e826ef264c3c23706e60a92c75f81d4d6 (mode 644) > --- /dev/null > +++ include/got_repository_dump.h > @@ -0,0 +1,22 @@ > +/* > + * Copyright (c) 2023 Omar Polo > + * > + * Permission to use, copy, modify, and distribute this software for any > + * purpose with or without fee is hereby granted, provided that the above > + * copyright notice and this permission notice appear in all copies. > + * > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + */ > + > +/* Output a bundle to the given file. */ > +const struct got_error * > +got_repo_dump(FILE *out, struct got_reflist_head *include_refs, > + struct got_reflist_head *exclude_refs, struct got_repository *repo, > + got_pack_progress_cb progress_cb, void *progress_arg, > + got_cancel_cb cancel_cb, void *cancel_arg); > blob - /dev/null > blob + 845ac0dd1d3f99357c7a70e09b1f47d509ac1770 (mode 644) > --- /dev/null > +++ lib/dump.c > @@ -0,0 +1,187 @@ > +/* > + * Copyright (c) 2023 Omar Polo > + * > + * Permission to use, copy, modify, and distribute this software for any > + * purpose with or without fee is hereby granted, provided that the above > + * copyright notice and this permission notice appear in all copies. > + * > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + */ > + > +#include > +#include > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include "got_error.h" > +#include "got_cancel.h" > +#include "got_reference.h" > +#include "got_repository_admin.h" /* XXX for pack_progress */ > +#include "got_object.h" > +#include "got_opentemp.h" > +#include "got_repository_dump.h" > + > +#include "got_lib_delta.h" > +#include "got_lib_object.h" > +#include "got_lib_object_idset.h" > +#include "got_lib_ratelimit.h" > +#include "got_lib_pack_create.h" > + > +#define GIT_BUNDLE_SIGNATURE_V2 "# v2 git bundle" > + > +struct idvec { > + struct got_object_id **ids; > + size_t len; > + size_t size; > +}; > + > +static const struct got_error * > +idvec_push(struct idvec *v, struct got_object_id *id) > +{ > + size_t newsize; > + void *t; > + > + if (v->len == v->size) { > + newsize = v->size + 8; > + t = reallocarray(v->ids, newsize, sizeof(*v->ids)); > + if (t == NULL) > + return got_error_from_errno("reallocarray"); > + v->ids = t; > + v->size = newsize; > + } > + > + v->ids[v->len++] = id; > + return NULL; > +} > + > +static void > +idvec_free(struct idvec *v) > +{ > + size_t i; > + > + for (i = 0; i < v->len; ++i) > + free(v->ids[i]); > + free(v->ids); > +} > + > +const struct got_error * > +got_repo_dump(FILE *out, struct got_reflist_head *include_refs, > + struct got_reflist_head *exclude_refs, struct got_repository *repo, > + got_pack_progress_cb progress_cb, void *progress_arg, > + got_cancel_cb cancel_cb, void *cancel_arg) > +{ > + const struct got_error *err = NULL; > + struct got_ratelimit rl; > + uint8_t packsha[SHA1_DIGEST_LENGTH]; > + FILE *delta_cache = NULL; > + struct got_reflist_entry *e; > + struct got_object_id *id = NULL; > + struct got_commit_object *commit = NULL; > + struct idvec ours, theirs; > + char *nl, *s, *hex, *logmsg = NULL; > + const char *refname; > + int r; > + > + got_ratelimit_init(&rl, 0, 500); > + > + memset(&ours, 0, sizeof(ours)); > + memset(&theirs, 0, sizeof(theirs)); > + > + r = fprintf(out, "%s\n", GIT_BUNDLE_SIGNATURE_V2); > + if (r != strlen(GIT_BUNDLE_SIGNATURE_V2) + 1) > + return got_ferror(out, GOT_ERR_IO); > + > + TAILQ_FOREACH(e, exclude_refs, entry) { > + err = got_ref_resolve(&id, repo, e->ref); > + if (err) > + goto done; > + > + idvec_push(&theirs, id); > + if (err) > + goto done; > + > + err = got_object_open_as_commit(&commit, repo, id); > + if (err) > + goto done; > + > + err = got_object_commit_get_logmsg(&logmsg, commit); > + if (err) > + goto done; > + > + s = logmsg; > + while (isspace((unsigned char)*s)) > + s++; > + nl = strchr(s, '\n'); > + if (nl) > + *nl = '\0'; > + > + err = got_object_id_str(&hex, id); > + if (err) > + goto done; > + fprintf(out, "-%s %s\n", hex, s); > + free(hex); > + > + got_object_commit_close(commit); > + commit = NULL; > + > + free(logmsg); > + logmsg = NULL; > + } > + > + TAILQ_FOREACH(e, include_refs, entry) { > + err = got_ref_resolve(&id, repo, e->ref); > + if (err) > + goto done; > + > + err = idvec_push(&ours, id); > + if (err) > + goto done; > + > + refname = got_ref_get_name(e->ref); > + > + err = got_object_id_str(&hex, id); > + if (err) > + goto done; > + fprintf(out, "%s %s\n", hex, refname); > + free(hex); > + } > + > + if (fputc('\n', out) == EOF || fflush(out) == EOF) { > + err = got_ferror(out, GOT_ERR_IO); > + goto done; > + } > + > + delta_cache = got_opentemp(); > + if (delta_cache == NULL) { > + err = got_error_from_errno("got_opentemp"); > + goto done; > + } > + > + err = got_pack_create(&packsha[0], fileno(out), delta_cache, > + theirs.ids, theirs.len, ours.ids, ours.len, > + repo, 0, 0, 0, progress_cb, progress_arg, &rl, > + cancel_cb, cancel_arg); > + > + done: > + idvec_free(&ours); > + idvec_free(&theirs); > + if (commit) > + got_object_commit_close(commit); > + if (delta_cache && fclose(delta_cache) == EOF && err == NULL) > + err = got_error_from_errno("fclose"); > + return err; > +} > blob - 9308738c9d24a7b328192c49138b59da3a6756c2 > blob + 0b66aa7ed80c3e660be33ffdb8bb00ac61dcacd8 > --- regress/cmdline/Makefile > +++ regress/cmdline/Makefile > @@ -1,7 +1,7 @@ > REGRESS_TARGETS=checkout update status log add rm diff blame branch tag \ > ref commit revert cherrypick backout rebase init import histedit \ > integrate merge stage unstage cat clone fetch send tree patch pack \ > - cleanup > + cleanup dump > NOOBJ=Yes > > GOT_TEST_ROOT=/tmp > @@ -99,5 +99,7 @@ cleanup: > cleanup: > ./cleanup.sh -q -r "$(GOT_TEST_ROOT)" > > +dump: > + ./dump.sh -q -r "$(GOT_TEST_ROOT)" > > .include > blob - /dev/null > blob + f7c13d273418faebbf6a5954344c0c7ced9da08c (mode 755) > --- /dev/null > +++ regress/cmdline/dump.sh > @@ -0,0 +1,97 @@ > +#!/bin/sh > +# > +# Copyright (c) 2023 Omar Polo > +# > +# Permission to use, copy, modify, and distribute this software for any > +# purpose with or without fee is hereby granted, provided that the above > +# copyright notice and this permission notice appear in all copies. > +# > +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + > +. ./common.sh > + > +test_dump_bundle() { > + local testroot=`test_init test_dump_bundle` > + > + # add a fake reference so that `got log' appears the same in > + # the cloned repository > + (cd "$testroot/repo" && got branch -n origin/master) > + > + (cd "$testroot/repo" && got log -p >$testroot/repo.log) > + > + (cd "$testroot/repo" && gotadmin dump -q master >$testroot/r.bundle) > + if [ $? -ne 0 ]; then > + echo "gotadmin dump failed unexpectedly" >&2 > + test_done "$testroot" 1 > + return 1 > + fi > + > + if ! (cd "$testroot" && git clone -b master -q r.bundle); then > + echo "failed to git clone from the generated bundle" >&2 > + test_done "$testroot" 1 > + return 1 > + fi > + > + if ! (cd "$testroot/r" && got log -p >$testroot/r.log); then > + echo "got log failed unexpectedly" >&2 > + test_done "$testroot" 1 > + return 1 > + fi > + > + if ! cmp -s "$testroot/repo.log" "$testroot/r.log"; then > + echo "history differs after clone" >&2 > + diff -u "$testroot/repo.log" "$testroot/r.log" > + test_done "$testroot" 1 > + return 1 > + fi > + > + (cd "$testroot/repo" && git checkout -q -b newbranch) > + > + # commit some changes in the repo > + for i in `seq 5`; do > + echo "alpha edit #$i" > $testroot/repo/alpha > + git_commit "$testroot/repo" -m "edit alpha" > + done > + > + (cd "$testroot/repo" && \ > + gotadmin dump -q -x master newbranch >$testroot/r.bundle) > + if [ $? -ne 0 ]; then > + echo "gotadmin dump failed unexpectedly" >&2 > + test_done "$testroot" 1 > + return 1 > + fi > + > + (cd "$testroot/r" && git checkout -q -b newbranch && \ > + git pull -q "$testroot/r.bundle" newbranch) > + if [ $? -ne 0 ]; then > + echo "git pull failed unexpectedly" >&2 > + test_done "$testroot" 1 > + return 1 > + fi > + > + (cd "$testroot/repo" && got log -p >$testroot/repo.log) > + > + if ! (cd "$testroot/r" && got log -p >$testroot/r.log); then > + echo "got log failed unexpectedly" >&2 > + test_done "$testroot" 1 > + return 1 > + fi > + > + if ! cmp -s "$testroot/repo.log" "$testroot/r.log"; then > + echo "history differs after pull" >&2 > + diff -u "$testroot/repo.log" "$testroot/r.log" > + test_done "$testroot" 1 > + return 1 > + fi > + > + test_done "$testroot" 0 > +} > + > +test_parseargs "$@" > +run_test test_dump_bundle -- Mark Jamsek GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68