From: Omar Polo Subject: introducing gotadmin dump To: gameoftrees@openbsd.org Date: Thu, 06 Jul 2023 16:29:43 +0200 here's a first attempt at teaching gotadmin to generate bundles. Git bundles are packfiles with a text header to allow git to clone or pull from. They're useful as a mean to beckup or offline transfer (part of) the history. The plan would be to extend it to generate a plain-text version of the history (a fast-export stream) too, and add an symmetric command `load' to, well, load the bundle or fast-import stream in the repository. (that's also why i'm adding a new file dump.c, it will grow functions to generate a fast-export stream.) as noted in the other thread, I'm not sure how to handle the progress, but didn't want to stress this yet, so i'm just reusing the pack progress callback. I'll have to revisit it soon(tm) to handle the fast-export stream format, and didn't want to add a wrapper callback just for the sake of having a different name. ----------------------------------------------- commit e528344ca7439b5a2401f3c132c91eb6046e6784 (fe) from: Omar Polo date: Thu Jul 6 14:27:22 2023 UTC add an initial implementation of gotadmin dump gotadmin dump is used to export (part of) the history of the repository; at the moment it only generates git bundles (which are pack files with a header) but support to generate a fast-import stream is planned. diff 27555e8e6053ab0cc846d201757b588d5a79293f e528344ca7439b5a2401f3c132c91eb6046e6784 commit - 27555e8e6053ab0cc846d201757b588d5a79293f commit + e528344ca7439b5a2401f3c132c91eb6046e6784 blob - 6e194139586ae1731a9aef3771d60596d3d0c756 blob + 80e118616d09947850bdc2b12829cac8eb2c8b3f --- gotadmin/Makefile +++ gotadmin/Makefile @@ -11,7 +11,8 @@ SRCS= gotadmin.c \ worktree_open.c hash.c bloom.c murmurhash2.c ratelimit.c \ sigs.c buf.c date.c object_open_privsep.c \ read_gitconfig_privsep.c read_gotconfig_privsep.c \ - pack_create_privsep.c pollfd.c reference_parse.c object_qid.c + pack_create_privsep.c pollfd.c reference_parse.c object_qid.c \ + dump.c MAN = ${PROG}.1 CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib blob - 8be42abe6d6ca03b938b1aebc0e2200cf5ffbf9f blob + 82bdcfb3e951cfff17a4fcf32279e0ce6ca40880 --- gotadmin/gotadmin.1 +++ gotadmin/gotadmin.1 @@ -337,7 +337,55 @@ work tree, use the repository path associated with thi .Xr got 1 work tree, use the repository path associated with this work tree. .El +.It Xo +.Cm dump +.Op Fl q +.Op Fl r Ar repository-path +.Op Fl x Ar reference +.Op Ar reference ... +.Xc +Dump the contents of the repository to standard output. +.Pp +If one or more +.Ar reference +argumenst is specified, only add objects which are reachable via the specified +references. +Each +.Ar reference +argument may either specify a specific reference or a reference namespace, +in which case all references within this namespace will be used. +.Pp +The options for +.Nm +.Cm dump +are as follows: +.Bl -tag -width Ds +.It Fl q +Suppress progress reporting output. +.It Fl r Ar repository-path +Use the repository at the specified path. +If not specified, assume the repository is located at or above the current +working directory. +If this directory is a +.Xr got 1 +work tree, use the repository path associated with this work tree. +.It Fl x Ar reference +Exclude objects reachable via the specified +.Ar reference +from the dump file. +The +.Ar reference +argument may either specify a specific reference or a reference namespace, +in which case all references within this namespace will be excluded. +The +.Fl x +option may be specified multiple times to build a list of references to exclude. +.Pp +Exclusion takes precedence over inclusion. +If a reference appears in both the included and excluded lists, it will +be excluded. .El +.El .Sh EXIT STATUS .Ex -std gotadmin .Sh SEE ALSO blob - 5551b60b73215eb11a82c034e1d2601ed9a15c31 blob + b6e6a92afc9d33254a55b1b686fb56d3d9604b59 --- gotadmin/gotadmin.c +++ gotadmin/gotadmin.c @@ -39,6 +39,7 @@ #include "got_cancel.h" #include "got_repository.h" #include "got_repository_admin.h" +#include "got_dump.h" #include "got_gotconfig.h" #include "got_path.h" #include "got_privsep.h" @@ -86,6 +87,7 @@ __dead static void usage_cleanup(void); __dead static void usage_indexpack(void); __dead static void usage_listpack(void); __dead static void usage_cleanup(void); +__dead static void usage_dump(void); static const struct got_error* cmd_init(int, char *[]); static const struct got_error* cmd_info(int, char *[]); @@ -93,6 +95,7 @@ static const struct got_error* cmd_cleanup(int, char static const struct got_error* cmd_indexpack(int, char *[]); static const struct got_error* cmd_listpack(int, char *[]); static const struct got_error* cmd_cleanup(int, char *[]); +static const struct got_error* cmd_dump(int, char *[]); static const struct gotadmin_cmd gotadmin_commands[] = { { "init", cmd_init, usage_init, "" }, @@ -101,6 +104,7 @@ static const struct gotadmin_cmd gotadmin_commands[] = { "indexpack", cmd_indexpack, usage_indexpack,"ix" }, { "listpack", cmd_listpack, usage_listpack, "ls" }, { "cleanup", cmd_cleanup, usage_cleanup, "cl" }, + { "dump", cmd_dump, usage_dump, "" }, }; static void @@ -459,6 +463,7 @@ struct got_pack_progress_arg { } struct got_pack_progress_arg { + FILE *out; char last_scaled_size[FMT_SCALED_STRSIZE]; int last_ncolored; int last_nfound; @@ -475,20 +480,20 @@ print_load_info(int print_colored, int print_found, in }; static void -print_load_info(int print_colored, int print_found, int print_trees, +print_load_info(FILE *out, int print_colored, int print_found, int print_trees, int ncolored, int nfound, int ntrees) { if (print_colored) { - printf("%d commit%s colored", ncolored, + fprintf(out, "%d commit%s colored", ncolored, ncolored == 1 ? "" : "s"); } if (print_found) { - printf("%s%d object%s found", + fprintf(out, "%s%d object%s found", ncolored > 0 ? "; " : "", nfound, nfound == 1 ? "" : "s"); } if (print_trees) { - printf("; %d tree%s scanned", ntrees, + fprintf(out, "; %d tree%s scanned", ntrees, ntrees == 1 ? "" : "s"); } } @@ -528,16 +533,16 @@ pack_progress(void *arg, int ncolored, int nfound, int if ((print_colored || print_found || print_trees) && !a->loading_done) { - printf("\r"); - print_load_info(print_colored, print_found, print_trees, - ncolored, nfound, ntrees); + fprintf(a->out, "\r"); + print_load_info(a->out, print_colored, print_found, + print_trees, ncolored, nfound, ntrees); a->printed_something = 1; - fflush(stdout); + fflush(a->out); return NULL; } else if (!a->loading_done) { - printf("\r"); - print_load_info(1, 1, 1, ncolored, nfound, ntrees); - printf("\n"); + fprintf(a->out, "\r"); + print_load_info(a->out, 1, 1, 1, ncolored, nfound, ntrees); + fprintf(a->out, "\n"); a->loading_done = 1; } @@ -585,22 +590,22 @@ pack_progress(void *arg, int ncolored, int nfound, int } if (print_searching || print_total || print_deltify || print_written) - printf("\r"); + fprintf(a->out, "\r"); if (print_searching) - printf("packing %d reference%s", ncommits, + fprintf(a->out, "packing %d reference%s", ncommits, ncommits == 1 ? "" : "s"); if (print_total) - printf("; %d object%s", nobj_total, + fprintf(a->out, "; %d object%s", nobj_total, nobj_total == 1 ? "" : "s"); if (print_deltify) - printf("; deltify: %d%%", p_deltify); + fprintf(a->out, "; deltify: %d%%", p_deltify); if (print_written) - printf("; writing pack: %*s %d%%", FMT_SCALED_STRSIZE - 2, - scaled_size, p_written); + fprintf(a->out, "; writing pack: %*s %d%%", + FMT_SCALED_STRSIZE - 2, scaled_size, p_written); if (print_searching || print_total || print_deltify || print_written) { a->printed_something = 1; - fflush(stdout); + fflush(a->out); } return NULL; } @@ -799,6 +804,7 @@ cmd_pack(int argc, char *argv[]) } memset(&ppa, 0, sizeof(ppa)); + ppa.out = stdout; ppa.last_scaled_size[0] = '\0'; ppa.last_p_indexed = -1; ppa.last_p_resolved = -1; @@ -900,6 +906,7 @@ cmd_indexpack(int argc, char *argv[]) goto done; memset(&ppa, 0, sizeof(ppa)); + ppa.out = stdout; ppa.last_scaled_size[0] = '\0'; ppa.last_p_indexed = -1; ppa.last_p_resolved = -1; @@ -1399,3 +1406,140 @@ done: free(repo_path); return error; } + +__dead static void +usage_dump(void) +{ + fprintf(stderr, "usage: %s dump [-q] [-r repository-path] " + "[-x reference] [reference]...\n", + getprogname()); + exit(1); +} + +static const struct got_error * +cmd_dump(int argc, char *argv[]) +{ + const struct got_error *error = NULL; + struct got_pack_progress_arg ppa; + struct got_repository *repo = NULL; + struct got_pathlist_head exclude_args; + struct got_pathlist_entry *pe; + struct got_reflist_head exclude_refs; + struct got_reflist_head include_refs; + struct got_reflist_entry *re, *new; + const char *refname; + char *repo_path = NULL; + int *pack_fds = NULL; + int verbosity = 0; + int i, ch; + + TAILQ_INIT(&exclude_args); + TAILQ_INIT(&exclude_refs); + TAILQ_INIT(&include_refs); + +#ifndef PROFILE + if (pledge("stdio rpath wpath cpath flock proc exec sendfd unveil", + NULL) == -1) + err(1, "pledge"); +#endif + + while ((ch = getopt(argc, argv, "qr:x:")) != -1) { + switch (ch) { + case 'q': + verbosity = -1; + break; + case 'r': + repo_path = realpath(optarg, NULL); + if (repo_path == NULL) + return got_error_from_errno2("realpath", + optarg); + got_path_strip_trailing_slashes(repo_path); + break; + case 'x': + error = got_pathlist_append(&exclude_args, + optarg, NULL); + if (error) + return error; + break; + default: + usage_dump(); + /* NOTREACHED */ + } + } + argc -= optind; + argv += optind; + + if (repo_path == NULL) { + error = get_repo_path(&repo_path); + if (error) + goto done; + } + error = got_repo_pack_fds_open(&pack_fds); + if (error != NULL) + goto done; + error = got_repo_open(&repo, repo_path, NULL, pack_fds); + if (error) + goto done; + + error = apply_unveil(got_repo_get_path_git_dir(repo), 0); + if (error) + goto done; + + TAILQ_FOREACH(pe, &exclude_args, entry) { + refname = pe->path; + error = add_ref(&new, &exclude_refs, refname, repo); + if (error) + goto done; + } + + if (argc == 0) { + error = got_ref_list(&include_refs, repo, "", + got_ref_cmp_by_name, NULL); + if (error) + goto done; + } else { + for (i = 0; i < argc; i++) { + got_path_strip_trailing_slashes(argv[i]); + refname = argv[i]; + error = add_ref(&new, &include_refs, refname, repo); + if (error) + goto done; + } + } + + /* Ignore references in the refs/got/ namespace. */ + TAILQ_FOREACH_SAFE(re, &include_refs, entry, new) { + refname = got_ref_get_name(re->ref); + if (strncmp("refs/got/", refname, 9) != 0) + continue; + TAILQ_REMOVE(&include_refs, re, entry); + got_ref_close(re->ref); + free(re); + } + + memset(&ppa, 0, sizeof(ppa)); + ppa.out = stderr; + ppa.verbosity = verbosity; + + error = got_dump(stdout, &include_refs, &exclude_refs, + repo, pack_progress, &ppa, check_cancelled, NULL); + if (ppa.printed_something) + fprintf(stderr, "\n"); + done: + if (repo) + got_repo_close(repo); + + if (pack_fds) { + const struct got_error *pack_err; + + pack_err = got_repo_pack_fds_close(pack_fds); + if (error == NULL) + error = pack_err; + } + + got_pathlist_free(&exclude_args, GOT_PATHLIST_FREE_NONE); + got_ref_list_free(&exclude_refs); + got_ref_list_free(&include_refs); + + return error; +} blob - /dev/null blob + 8ba551d76e42fefa0426ef5a754370ef22d787fd (mode 644) --- /dev/null +++ include/got_dump.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* Output a bundle to the given file. */ +const struct got_error * +got_dump(FILE *out, struct got_reflist_head *include_refs, + struct got_reflist_head *exclude_refs, struct got_repository *repo, + got_pack_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg); blob - /dev/null blob + 48e13ed85bfdc396804b57e89345093843499954 (mode 644) --- /dev/null +++ lib/dump.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2023 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "got_error.h" +#include "got_cancel.h" +#include "got_reference.h" +#include "got_repository_admin.h" /* XXX for pack_progress */ +#include "got_object.h" +#include "got_opentemp.h" +#include "got_dump.h" + +#include "got_lib_delta.h" +#include "got_lib_object.h" +#include "got_lib_object_idset.h" +#include "got_lib_ratelimit.h" +#include "got_lib_pack_create.h" + +#define GIT_BUNDLE_SIGNATURE_V2 "# v2 git bundle" + +struct idvec { + struct got_object_id **ids; + size_t len; + size_t cap; +}; + +static const struct got_error * +idvec_push(struct idvec *v, struct got_object_id *id) +{ + size_t newcap; + void *t; + + if (v->len == v->cap) { + newcap = v->cap + 8; + t = reallocarray(v->ids, newcap, sizeof(*v->ids)); + if (t == NULL) + return got_error_from_errno("reallocarray"); + v->ids = t; + v->cap = newcap; + } + + v->ids[v->len++] = id; + return NULL; +} + +static void +idvec_free(struct idvec *v) +{ + size_t i; + + for (i = 0; i < v->len; ++i) + free(v->ids[i]); + free(v->ids); +} + +const struct got_error * +got_dump(FILE *out, struct got_reflist_head *include_refs, + struct got_reflist_head *exclude_refs, struct got_repository *repo, + got_pack_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err = NULL; + struct got_ratelimit rl; + uint8_t packsha[SHA1_DIGEST_LENGTH]; + FILE *delta_cache = NULL; + struct got_reflist_entry *e; + struct got_object_id *id = NULL; + struct got_commit_object *commit = NULL; + struct idvec ours, theirs; + char *nl, *s, *hex, *logmsg = NULL; + const char *refname; + int r; + + got_ratelimit_init(&rl, 0, 500); + + memset(&ours, 0, sizeof(ours)); + memset(&theirs, 0, sizeof(theirs)); + + r = fprintf(out, "%s\n", GIT_BUNDLE_SIGNATURE_V2); + if (r != strlen(GIT_BUNDLE_SIGNATURE_V2) + 1) + return got_ferror(out, GOT_ERR_IO); + + TAILQ_FOREACH(e, exclude_refs, entry) { + err = got_ref_resolve(&id, repo, e->ref); + if (err) + goto done; + + idvec_push(&theirs, id); + if (err) + goto done; + + err = got_object_open_as_commit(&commit, repo, id); + if (err) + goto done; + + err = got_object_commit_get_logmsg(&logmsg, commit); + if (err) + goto done; + + s = logmsg; + while (isspace((unsigned char)*s)) + s++; + nl = strchr(s, '\n'); + if (nl) + *nl = '\0'; + + err = got_object_id_str(&hex, id); + if (err) + goto done; + fprintf(out, "-%s %s\n", hex, s); + free(hex); + + got_object_commit_close(commit); + commit = NULL; + + free(logmsg); + logmsg = NULL; + } + + TAILQ_FOREACH(e, include_refs, entry) { + err = got_ref_resolve(&id, repo, e->ref); + if (err) + goto done; + + err = idvec_push(&ours, id); + if (err) + goto done; + + refname = got_ref_get_name(e->ref); + + err = got_object_id_str(&hex, id); + if (err) + goto done; + fprintf(out, "%s %s\n", hex, refname); + free(hex); + } + + if (fputc('\n', out) == EOF || fflush(out) == EOF) { + err = got_ferror(out, GOT_ERR_IO); + goto done; + } + + delta_cache = got_opentemp(); + if (delta_cache == NULL) { + err = got_error_from_errno("got_opentemp"); + goto done; + } + + err = got_pack_create(&packsha[0], fileno(out), delta_cache, + theirs.ids, theirs.len, ours.ids, ours.len, + repo, 0, 0, 0, progress_cb, progress_arg, &rl, + cancel_cb, cancel_arg); + + done: + idvec_free(&ours); + idvec_free(&theirs); + if (commit) + got_object_commit_close(commit); + if (delta_cache && fclose(delta_cache) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + return err; +} blob - 9308738c9d24a7b328192c49138b59da3a6756c2 blob + 0b66aa7ed80c3e660be33ffdb8bb00ac61dcacd8 --- regress/cmdline/Makefile +++ regress/cmdline/Makefile @@ -1,7 +1,7 @@ REGRESS_TARGETS=checkout update status log add rm diff blame branch tag \ ref commit revert cherrypick backout rebase init import histedit \ integrate merge stage unstage cat clone fetch send tree patch pack \ - cleanup + cleanup dump NOOBJ=Yes GOT_TEST_ROOT=/tmp @@ -99,5 +99,7 @@ cleanup: cleanup: ./cleanup.sh -q -r "$(GOT_TEST_ROOT)" +dump: + ./dump.sh -q -r "$(GOT_TEST_ROOT)" .include blob - /dev/null blob + 789d7f5e4ce7e913d61aab2a3ee2b7436a41a45c (mode 755) --- /dev/null +++ regress/cmdline/dump.sh @@ -0,0 +1,101 @@ +#!/bin/sh +# +# Copyright (c) 2023 Omar Polo +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +. ./common.sh + +test_dump_bundle() { + local testroot=`test_init test_dump_bundle` + + # add a fake reference so that `got log' appears the same in + # the cloned repository + (cd "$testroot/repo" && got branch -n origin/master) + + (cd "$testroot/repo" && got log >$testroot/repo.log) + + (cd "$testroot/repo" && gotadmin dump -q master >$testroot/r.bundle) + if [ $? -ne 0 ]; then + echo "gotadmin dump failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! (cd "$testroot" && git clone -b master -q r.bundle); then + echo "failed to git clone from the generated bundle" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! (cd "$testroot/r" && got log >$testroot/r.log); then + echo "got log failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! cmp -s "$testroot/repo.log" "$testroot/r.log"; then + echo "history differs after clone" >&2 + diff -u "$testroot/repo.log" "$testroot/r.log" + test_done "$testroot" 1 + return 1 + fi + + (cd "$testroot/repo" && git checkout -q -b newbranch) + + # commit some changes in the repo + for i in `seq 5`; do + echo "alpha edit #$i" > $testroot/repo/alpha + git_commit "$testroot/repo" -m "edit alpha" + done + + (cd "$testroot/repo" && \ + gotadmin dump -q -x master newbranch >$testroot/r.bundle) + if [ $? -ne 0 ]; then + echo "gotadmin dump failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + (cd "$testroot/r" && git checkout -q -b newbranch && \ + git pull -q origin newbranch) + if [ $? -ne 0 ]; then + echo "git pull failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + # add a fake reference so that `got log' appears the same in + # the cloned repository + (cd "$testroot/repo" && got branch -c newbranch -n origin/newbranch) + + (cd "$testroot/repo" && got log >$testroot/repo.log) + + if ! (cd "$testroot/r" && got log >$testroot/r.log); then + echo "got log failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! cmp -s "$testroot/repo.log" "$testroot/r.log"; then + echo "history differs after pull" >&2 + diff -u "$testroot/repo.log" "$testroot/r.log" + test_done "$testroot" 1 + return 1 + fi + + test_done "$testroot" 0 +} + +test_parseargs "$@" +run_test test_dump_bundle