From: Stefan Sperling Subject: Re: got clone: start of effort. To: Ori Bernstein Cc: gameoftrees@openbsd.org Date: Fri, 29 Nov 2019 13:45:18 -0700 On Fri, Nov 29, 2019 at 12:17:36PM -0800, Ori Bernstein wrote: > This diff implements the fetch protocol support for git+ssh, and almost > implements it for unencrypted git:// (haven't written the code to > open the socket yet) > > It's a quick and dirty port/airdrop/privsepification of the code I wrote > for git9, so it doesn't match the style or the data structures used in > the rest of got. We'll need to do a few passes to clean it up and make it > fit before i'd be happy getting it in, but I'd like to show where things > are. > > There's no unveil or pledge. > > The code in git-fetch-pack should be usable for incremental fetches, too, > once we do the plumbing to figure out which objects are in the tree. > > I'll be doing another diff to implement git push, doing the same kind of > airdrop. Thanks so much for working on this! Comments inline: > diff --git a/got/Makefile b/got/Makefile > index 709948a..27b8780 100644 > --- a/got/Makefile > +++ b/got/Makefile > @@ -8,7 +8,7 @@ SRCS= got.c blame.c commit_graph.c delta.c diff.c \ > object_idset.c object_parse.c opentemp.c path.c pack.c \ > privsep.c reference.c repository.c sha1.c worktree.c \ > inflate.c buf.c rcsutil.c diff3.c lockfile.c \ > - deflate.c object_create.c delta_cache.c > + deflate.c object_create.c delta_cache.c fetch.c > MAN = ${PROG}.1 got-worktree.5 git-repository.5 > > CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib > diff --git a/got/got.c b/got/got.c > index 9f40340..22bd4d7 100644 > --- a/got/got.c > +++ b/got/got.c > @@ -45,6 +45,7 @@ > #include "got_worktree.h" > #include "got_diff.h" > #include "got_commit_graph.h" > +#include "got_lib_fetch.h" > #include "got_blame.h" > #include "got_privsep.h" > #include "got_opentemp.h" > @@ -80,6 +81,7 @@ __dead static void usage(int); > __dead static void usage_init(void); > __dead static void usage_import(void); > __dead static void usage_checkout(void); > +__dead static void usage_clone(void); > __dead static void usage_update(void); > __dead static void usage_log(void); > __dead static void usage_diff(void); > @@ -104,6 +106,7 @@ __dead static void usage_cat(void); > > static const struct got_error* cmd_init(int, char *[]); > static const struct got_error* cmd_import(int, char *[]); > +static const struct got_error* cmd_clone(int, char *[]); > static const struct got_error* cmd_checkout(int, char *[]); > static const struct got_error* cmd_update(int, char *[]); > static const struct got_error* cmd_log(int, char *[]); > @@ -131,6 +134,7 @@ static struct got_cmd got_commands[] = { > { "init", cmd_init, usage_init, "in" }, > { "import", cmd_import, usage_import, "im" }, > { "checkout", cmd_checkout, usage_checkout, "co" }, > + { "clone", cmd_clone, usage_clone, "cl" }, > { "update", cmd_update, usage_update, "up" }, > { "log", cmd_log, usage_log, "" }, > { "diff", cmd_diff, usage_diff, "di" }, > @@ -794,6 +798,13 @@ done: > return error; > } > > +__dead static void > +usage_clone(void) > +{ > + fprintf(stderr, "usage: %s clone repo-path\n", getprogname()); clone repo-url? > + exit(1); > +} > + > __dead static void > usage_checkout(void) > { > @@ -969,6 +980,34 @@ resolve_commit_arg(struct got_object_id **commit_id, > return err; > } > > +static const struct got_error * > +cmd_clone(int argc, char *argv[]) > +{ > + char *uri, *branch_filter, *dirname; > + int ch; > + > + while ((ch = getopt(argc, argv, "b:c:p:")) != -1) { -c and -p aren't used. > + switch (ch) { > + case 'b': > + branch_filter = optarg; > + break; > + default: > + usage_clone(); > + break; > + } > + } > + argc -= optind; > + argv += optind; > + uri = argv[0]; > + if(argc == 1) > + dirname = NULL; > + else if(argc == 2) > + dirname = argv[1]; > + else > + usage_clone(); > + return got_clone(argv[0], branch_filter, dirname); > +} > + > static const struct got_error * > cmd_checkout(int argc, char *argv[]) > { > diff --git a/include/got_error.h b/include/got_error.h > index d0df125..50c6e2d 100644 > --- a/include/got_error.h > +++ b/include/got_error.h > @@ -128,6 +128,9 @@ > #define GOT_ERR_REGEX 112 > #define GOT_ERR_REF_NAME_MINUS 113 > #define GOT_ERR_GITCONFIG_SYNTAX 114 > +#define GOT_ERR_FETCH_FAILED 115 > +#define GOT_ERR_PARSE_URI 116 > +#define GOT_ERR_BAD_PROTO 117 > > static const struct got_error { > int code; > @@ -262,6 +265,9 @@ static const struct got_error { > { GOT_ERR_REGEX, "regular expression error" }, > { GOT_ERR_REF_NAME_MINUS, "reference name may not start with '-'" }, > { GOT_ERR_GITCONFIG_SYNTAX, "gitconfig syntax error" }, > + { GOT_ERR_FETCH_FAILED, "fetch failed" }, > + { GOT_ERR_PARSE_URI, "failed to parse uri" }, > + { GOT_ERR_BAD_PROTO, "unknown protocol" }, > }; > > /* > diff --git a/lib/fetch.c b/lib/fetch.c > new file mode 100644 > index 0000000..dfba5c3 > --- /dev/null > +++ b/lib/fetch.c > @@ -0,0 +1,342 @@ > +/* > + * Copyright (c) 2018, 2019 Stefan Sperling PLease add your own copyright claim here. I didn't write most of this. > + * > + * Permission to use, copy, modify, and distribute this software for any > + * purpose with or without fee is hereby granted, provided that the above > + * copyright notice and this permission notice appear in all copies. > + * > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include "got_error.h" > +#include "got_reference.h" > +#include "got_repository.h" > +#include "got_path.h" > +#include "got_cancel.h" > +#include "got_worktree.h" > +#include "got_object.h" > + > +#include "got_lib_delta.h" > +#include "got_lib_inflate.h" > +#include "got_lib_object.h" > +#include "got_lib_object_parse.h" > +#include "got_lib_object_create.h" > +#include "got_lib_pack.h" > +#include "got_lib_sha1.h" > +#include "got_lib_privsep.h" > +#include "got_lib_object_cache.h" > +#include "got_lib_repository.h" > + > +#define GOT_PROTOMAX 64 > +#define GOT_HOSTMAX 256 > +#define GOT_PATHMAX 512 > +#define GOT_REPOMAX 256 > +#define GOT_PORTMAX 16 > +#define GOT_URIMAX 1024 > + > +static int > +mkpath(char *path) > +{ > + char *p, namebuf[PATH_MAX]; > + struct stat sb; > + int done; > + > + while (*path == '/') > + path++; > + if(strlcpy(namebuf, path, sizeof(namebuf)) >= sizeof(namebuf)) { > + errno = ENAMETOOLONG; > + return -1; > + } > + > + p = namebuf; > + for (;;) { > + p += strspn(p, "/"); > + p += strcspn(p, "/"); > + done = (*p == '\0'); > + *p = '\0'; > + > + if (mkdir(namebuf, 0755) != 0) { > + int mkdir_errno = errno; > + if (stat(path, &sb) == -1) { > + /* Not there; use mkdir()s errno */ > + errno = mkdir_errno; > + return -1; > + } > + if (!S_ISDIR(sb.st_mode)) { > + /* Is there, but isn't a directory */ > + errno = ENOTDIR; > + return -1; > + } > + } > + > + if (done) > + break; > + *p = '/'; > + } > + > + return 0; > +} > + > +static int > +hassuffix(char *base, char *suf) > +{ > + int nb, ns; > + > + nb = strlen(base); > + ns = strlen(suf); > + if(ns <= nb && strcmp(base + (nb - ns), suf) == 0) > + return 1; > + return 0; > +} > + > +static int > +grab(char *dst, int n, char *p, char *e) > +{ > + int l; > + > + l = e - p; > + if(l >= n) { > + errno = ENAMETOOLONG; > + return -1; > + } > + return strlcpy(dst, p, l + 1); > +} > + > +static int > +got_dial_ssh(char *host, char *port, char *path, char *direction) > +{ > + int pid, pfd[2]; > + char cmd[64]; > + > + if(pipe(pfd) == -1) > + return -1; > + pid = fork(); > + if(pid == -1) > + return -1; > + if(pid == 0){ > + close(pfd[1]); > + dup2(pfd[0], 0); > + dup2(pfd[0], 1); > + snprintf(cmd, sizeof(cmd), "git-%s-pack", direction); > + execlp("ssh", "ssh", host, cmd, path, NULL); > + abort(); > + }else{ > + close(pfd[0]); > + return pfd[1]; > + } > +} > + > +#if 0 > +int > +got_dial_git(char *host, char *port, char *path, char *direction) > +{ > + struct sockaddr_storage sk; > + char *ds, cmd[128]; > + int fd, l; > + > + ds = netmkaddr(host, "tcp", port); > + fd = dial(ds, NULL, NULL, NULL); > + if(fd == -1) > + return -1; > + if(chattygit) > + fprint(2, "dial %s %s git-%s-pack %s\n", host, port, direction, path); > + l = snprint(cmd, sizeof(cmd), "git-%s-pack %s\n", direction, path); > + if(writepkt(fd, cmd, l + 1) == -1){ > + print("failed to write message\n"); > + close(fd); > + return -1; > + } > + return fd; > +} > +#endif > + > +int > +got_parse_uri(char *uri, char *proto, char *host, char *port, char *path, char *repo) > +{ > + char *s, *p, *q; > + int n, hasport; > + > + p = strstr(uri, "://"); > + if(!p){ > + //werrstr("missing protocol"); > + return -1; > + } > + if (grab(proto, GOT_PROTOMAX, uri, p) == -1) > + return -1; > + hasport = (strcmp(proto, "git") == 0 || strstr(proto, "http") == proto); > + s = p + 3; > + p = NULL; > + if(!hasport){ > + p = strstr(s, ":"); > + if(p != NULL) > + p++; > + } > + if(p == NULL) > + p = strstr(s, "/"); > + if(p == NULL || strlen(p) == 1){ > + //werrstr("missing path"); > + return -1; > + } > + > + q = memchr(s, ':', p - s); > + if(q){ > + grab(host, GOT_HOSTMAX, s, q); > + grab(port, GOT_PORTMAX, q + 1, p); > + }else{ > + grab(host, GOT_HOSTMAX, s, p); > + snprintf(port, GOT_PORTMAX, "9418"); > + } > + > + snprintf(path, GOT_PATHMAX, "%s", p); > + p = strrchr(p, '/') + 1; > + if(!p || strlen(p) == 0){ > + //werrstr("missing repository in uri"); > + return -1; > + } > + n = strlen(p); > + if(hassuffix(p, ".git")) > + n -= 4; > + grab(repo, GOT_REPOMAX, p, p + n); > + return 0; > +} > + > +const struct got_error* > +got_clone(char *uri, char *branch_filter, char *dirname) > +{ > + char proto[GOT_PROTOMAX], host[GOT_HOSTMAX], port[GOT_PORTMAX]; > + char repo[GOT_REPOMAX], path[GOT_PATHMAX]; > + int imsg_fetchfds[2], imsg_idxfds[2], fetchfd; > + int packfd, npackfd, idxfd, nidxfd, status; > + struct got_object_id packhash; > + const struct got_error *err; > + struct imsgbuf ibuf; > + pid_t pid; > + > + fetchfd = -1; > + if (got_parse_uri(uri, proto, host, port, path, repo) == -1) > + return got_error(GOT_ERR_PARSE_URI); > + if (dirname == NULL) > + dirname = repo; > + err = got_repo_init(dirname); > + if (err != NULL) > + return err; > + if (chdir(dirname)) > + return got_error_from_errno("enter new repo"); > + if (mkpath(".git/objects/pack") == -1) > + return got_error_from_errno("mkpath"); > + packfd = open(".git/objects/pack/fetching.pack", O_CREAT|O_RDWR, 0644); > + if (packfd == -1) > + return got_error_from_errno("open pack"); > + npackfd = dup(packfd); > + if (npackfd == -1) > + return got_error_from_errno("dup"); > + idxfd = open(".git/objects/pack/fetching.idx", O_CREAT|O_RDWR, 0644); > + if (idxfd == -1) > + return got_error_from_errno("open pack"); > + nidxfd = dup(idxfd); > + if (nidxfd == -1) > + return got_error_from_errno("dup"); > + > + if(strcmp(proto, "ssh") == 0 || strcmp(proto, "git+ssh") == 0) > + fetchfd = got_dial_ssh(host, port, path, "upload"); > + //else if(strcmp(proto, "git") == 0) > + // fetchfd = got_dial_git(host, port, path, "upload"); > + else if(strcmp(proto, "http") == 0 || strcmp(proto, "git+http") == 0) > + err = got_error(GOT_ERR_BAD_PROTO); > + else > + err = got_error(GOT_ERR_BAD_PROTO); > + > + if (fetchfd == -1) > + err = got_error_from_errno("dial uri"); > + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fetchfds) == -1) > + return got_error_from_errno("socketpair"); > + > + pid = fork(); > + if (pid == -1) > + return got_error_from_errno("fork"); > + else if (pid == 0) > + got_privsep_exec_child(imsg_fetchfds, GOT_PATH_PROG_FETCH_PACK, "."); > + > + if (close(imsg_fetchfds[1]) != 0) > + return got_error_from_errno("close"); > + imsg_init(&ibuf, imsg_fetchfds[0]); > + err = got_privsep_send_fetch_req(&ibuf, fetchfd); > + if (err != NULL) > + return err; > + err = got_privsep_wait_ack(&ibuf); > + if (err != NULL) > + return err; > + err = got_privsep_send_tmpfd(&ibuf, npackfd); > + if (err != NULL) > + return err; > + npackfd = dup(packfd); > + if (npackfd == -1) > + return got_error_from_errno("dup"); > + err = got_privsep_wait_fetch_done(&ibuf, &packhash); > + if (err != NULL) > + return err; > + if (waitpid(pid, &status, 0) == -1) > + return got_error_from_errno("child exit"); > + > + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_idxfds) == -1) > + return got_error_from_errno("socketpair"); > + pid = fork(); > + if (pid == -1) > + return got_error_from_errno("fork"); > + else if (pid == 0) > + got_privsep_exec_child(imsg_idxfds, GOT_PATH_PROG_INDEX_PACK, "."); > + if (close(imsg_idxfds[1]) != 0) > + return got_error_from_errno("close"); > + imsg_init(&ibuf, imsg_idxfds[0]); > + > + err = got_privsep_send_index_pack_req(&ibuf, npackfd, packhash); > + if (err != NULL) > + return err; > + err = got_privsep_wait_ack(&ibuf); > + if (err != NULL) > + return err; > + err = got_privsep_send_tmpfd(&ibuf, nidxfd); > + if (err != NULL) > + return err; > + err = got_privsep_wait_index_pack_done(&ibuf); > + if (err != NULL) > + return err; > + imsg_clear(&ibuf); > + if (close(imsg_idxfds[0]) == -1) > + return got_error_from_errno("close child"); > + if (waitpid(pid, &status, 0) == -1) > + return got_error_from_errno("child exit"); > + > + > + return NULL; > + > +} > diff --git a/lib/got_lib_object_idset.h b/lib/got_lib_object_idset.h > index 6ae68d9..6dbd75c 100644 > --- a/lib/got_lib_object_idset.h > +++ b/lib/got_lib_object_idset.h > @@ -26,6 +26,8 @@ const struct got_error *got_object_idset_remove(void **, > struct got_object_idset *, struct got_object_id *); > int got_object_idset_contains(struct got_object_idset *, > struct got_object_id *); > +void *got_object_idset_lookup_data(struct got_object_idset *, > + struct got_object_id *); > const struct got_error *got_object_idset_for_each(struct got_object_idset *, > const struct got_error *(*cb)(struct got_object_id *, void *, void *), > void *); > diff --git a/lib/got_lib_privsep.h b/lib/got_lib_privsep.h > index b27325d..6789c93 100644 > --- a/lib/got_lib_privsep.h > +++ b/lib/got_lib_privsep.h > @@ -42,6 +42,8 @@ > #define GOT_PROG_READ_TAG got-read-tag > #define GOT_PROG_READ_PACK got-read-pack > #define GOT_PROG_READ_GITCONFIG got-read-gitconfig > +#define GOT_PROG_FETCH_PACK got-fetch-pack > +#define GOT_PROG_SEND_PACK got-send-pack > > #define GOT_STRINGIFY(x) #x > #define GOT_STRINGVAL(x) GOT_STRINGIFY(x) > @@ -61,6 +63,10 @@ > GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_PACK) > #define GOT_PATH_PROG_READ_GITCONFIG \ > GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_GITCONFIG) > +#define GOT_PATH_PROG_FETCH_PACK \ > + GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_FETCH_PACK) > +#define GOT_PATH_PROG_SEND_PACK \ > + GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_SEND_PACK) > > struct got_privsep_child { > int imsg_fd; > @@ -98,6 +104,12 @@ enum got_imsg_type { > GOT_IMSG_TAG, > GOT_IMSG_TAG_TAGMSG, > > + /* Messages related to networking. */ > + GOT_IMSG_FETCH_REQUEST, > + GOT_IMSG_FETCH_DONE, > + GOT_IMSG_IDXPACK_REQUEST, > + GOT_IMSG_IDXPACK_DONE, > + > /* Messages related to pack files. */ > GOT_IMSG_PACKIDX, > GOT_IMSG_PACK, > @@ -106,6 +118,7 @@ enum got_imsg_type { > /* Message sending file descriptor to a temporary file. */ > GOT_IMSG_TMPFD, > > + > /* Messages related to gitconfig files. */ > GOT_IMSG_GITCONFIG_PARSE_REQUEST, > GOT_IMSG_GITCONFIG_REPOSITORY_FORMAT_VERSION_REQUEST, > @@ -272,6 +285,12 @@ const struct got_error *got_privsep_send_blob_outfd(struct imsgbuf *, int); > const struct got_error *got_privsep_send_tmpfd(struct imsgbuf *, int); > const struct got_error *got_privsep_send_obj(struct imsgbuf *, > struct got_object *); > +const struct got_error *got_privsep_send_index_pack_req(struct imsgbuf *, int); > +const struct got_error *got_privsep_send_index_pack_done(struct imsgbuf *, > + struct got_object_id*); > +const struct got_error *got_privsep_send_fetch_req(struct imsgbuf *, int); > +const struct got_error *got_privsep_send_fetch_done(struct imsgbuf *); > +const struct got_error *got_privsep_wait_fetch_done(struct imsgbuf *); > const struct got_error *got_privsep_get_imsg_obj(struct got_object **, > struct imsg *, struct imsgbuf *); > const struct got_error *got_privsep_recv_obj(struct got_object **, > diff --git a/lib/inflate.c b/lib/inflate.c > index 3986b17..ebcdf12 100644 > --- a/lib/inflate.c > +++ b/lib/inflate.c > @@ -86,12 +86,16 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp) > size_t last_total_out = zb->z.total_out; > z_stream *z = &zb->z; > int ret = Z_ERRNO; > + off_t off, consumed; > > z->next_out = zb->outbuf; > z->avail_out = zb->outlen; > > *outlenp = 0; > + off = ftello(f); > + consumed = 0; > do { > + size_t last_total_in = zb->z.total_in; > if (z->avail_in == 0) { > size_t n = fread(zb->inbuf, 1, zb->inlen, f); > if (n == 0) { > @@ -105,6 +109,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp) > z->avail_in = n; > } > ret = inflate(z, Z_SYNC_FLUSH); > + consumed += z->total_in - last_total_in; > } while (ret == Z_OK && z->avail_out > 0); > > if (ret == Z_OK) { > @@ -116,6 +121,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp) > } > > *outlenp = z->total_out - last_total_out; > + fseek(f, off + consumed, SEEK_SET); > return NULL; > } > > diff --git a/lib/object.c b/lib/object.c > index 4aaeb8a..2044c22 100644 > --- a/lib/object.c > +++ b/lib/object.c > @@ -51,6 +51,7 @@ > #include "got_lib_object_cache.h" > #include "got_lib_object_parse.h" > #include "got_lib_pack.h" > +#include "got_lib_fetch.h" > #include "got_lib_repository.h" > > #ifndef MIN > diff --git a/lib/object_idset.c b/lib/object_idset.c > index 527383c..510b59e 100644 > --- a/lib/object_idset.c > +++ b/lib/object_idset.c > @@ -168,6 +168,14 @@ got_object_idset_contains(struct got_object_idset *set, > return entry ? 1 : 0; > } > > +void * > +got_object_idset_lookup_data(struct got_object_idset *set, > + struct got_object_id *id) This function seems to do the exact same thing as got_object_idset_get() which already exists. > +{ > + struct got_object_idset_element *entry = find_element(set, id); > + return entry ? entry->data : NULL; > +} > + > const struct got_error * > got_object_idset_for_each(struct got_object_idset *set, > const struct got_error *(*cb)(struct got_object_id *, void *, void *), > diff --git a/lib/privsep.c b/lib/privsep.c > index f31cafb..599d5eb 100644 > --- a/lib/privsep.c > +++ b/lib/privsep.c > @@ -401,6 +401,44 @@ got_privsep_send_obj(struct imsgbuf *ibuf, struct got_object *obj) > return flush_imsg(ibuf); > } > > +const struct got_error * > +got_privsep_send_fetch_req(struct imsgbuf *ibuf, int fd) > +{ > + const struct got_error *err = NULL; > + > + if (imsg_compose(ibuf, GOT_IMSG_FETCH_REQUEST, 0, 0, fd, > + NULL, 0) == -1) { > + err = got_error_from_errno("imsg_compose FETCH_REQUEST"); > + close(fd); > + return err; > + } > + return flush_imsg(ibuf); > +} > + > +const struct got_error * > +got_privsep_send_fetch_done(struct imsgbuf *ibuf) > +{ > + if (imsg_compose(ibuf, GOT_IMSG_FETCH_DONE, 0, 0, -1, NULL, 0) == -1) > + return got_error_from_errno("imsg_compose FETCH"); > + return flush_imsg(ibuf); > +} > + > +const struct got_error * > +got_privsep_wait_fetch_done(struct imsgbuf *ibuf) > +{ > + const struct got_error *err = NULL; > + struct imsg imsg; > + > + err = got_privsep_recv_imsg(&imsg, ibuf, 0); > + if (err) > + return err; > + if (imsg.hdr.type == GOT_IMSG_FETCH_DONE) > + return NULL; > + else > + return got_error(GOT_ERR_PRIVSEP_MSG); > + imsg_free(&imsg); > +} > + > const struct got_error * > got_privsep_get_imsg_obj(struct got_object **obj, struct imsg *imsg, > struct imsgbuf *ibuf) > diff --git a/libexec/Makefile b/libexec/Makefile > index a4c900b..ddc207c 100644 > --- a/libexec/Makefile > +++ b/libexec/Makefile > @@ -1,4 +1,4 @@ > SUBDIR = got-read-blob got-read-commit got-read-object got-read-tree \ > - got-read-tag got-read-pack got-read-gitconfig > + got-read-tag got-fetch-pack got-read-pack got-read-gitconfig > > .include > diff --git a/libexec/got-index-pack/got-index-pack.c b/libexec/got-index-pack/got-index-pack.c > new file mode 100644 > index 0000000..3bf2502 > --- /dev/null > +++ b/libexec/got-index-pack/got-index-pack.c > @@ -0,0 +1,1254 @@ Please add your copyright and licence statements here. > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include "got_error.h" > +#include "got_object.h" > + > +#include "got_lib_sha1.h" > +#include "got_lib_delta.h" > +#include "got_lib_inflate.h" > +#include "got_lib_object.h" > +#include "got_lib_object_parse.h" > +#include "got_lib_object_idset.h" > +#include "got_lib_privsep.h" > + > +typedef struct Cinfo Cinfo; > +typedef struct Tinfo Tinfo; > +typedef struct Object Object; > +typedef struct Pack Pack; > +typedef struct Buf Buf; > +typedef struct Dirent Dirent; > +typedef struct Idxent Idxent; > +typedef struct Ols Ols; > + > +enum { > + /* 5k objects should be enough */ > + Cachemax = 5*1024, > + Pathmax = 512, > + Hashsz = 20, > + Pktmax = 65536, > + > + Nproto = 16, > + Nport = 16, > + Nhost = 256, > + Npath = 128, > + Nrepo = 64, > + Nbranch = 32, > +}; > + > +typedef enum Type { > + GNone = 0, > + GCommit = 1, > + GTree = 2, > + GBlob = 3, > + GTag = 4, > + GOdelta = 6, > + GRdelta = 7, > +} Type; > + > +enum { > + Cloaded = 1 << 0, > + Cidx = 1 << 1, > + Ccache = 1 << 2, > + Cexist = 1 << 3, > + Cparsed = 1 << 5, > +}; > + > +struct Dirent { > + char *name; > + int modref; > + int mode; > + struct got_object_id h; > +}; > + > +struct Object { > + /* Git data */ > + struct got_object_id hash; > + Type type; > + > + /* Cache */ > + int id; > + int flag; > + int refs; > + Object *next; > + Object *prev; > + > + /* For indexing */ > + off_t off; > + off_t len; > + uint32_t crc; > + > + /* Everything below here gets cleared */ > + char *all; > + char *data; > + /* size excludes header */ > + off_t size; > + > + union { > + Cinfo *commit; > + Tinfo *tree; > + }; > +}; > + > +struct Tinfo { > + /* Tree */ > + Dirent *ent; > + int nent; > +}; > + > +struct Cinfo { > + /* Commit */ > + struct got_object_id *parent; > + int nparent; > + struct got_object_id tree; > + char *author; > + char *committer; > + char *msg; > + int nmsg; > + off_t ctime; > + off_t mtime; > +}; > + > +typedef struct Buf Buf; > + > +struct Buf { > + int len; > + int sz; > + char *data; > +}; > + > +static int readpacked(FILE *, Object *, int); > +static Object *readidxobject(FILE *, struct got_object_id, int); > + > +struct got_object_idset *objcache; > +int next_object_id; > +Object *lruhead; > +Object *lrutail; > +int ncache; > + > +#define GETBE16(b)\ > + ((((b)[0] & 0xFFul) << 8) | \ > + (((b)[1] & 0xFFul) << 0)) > + > +#define GETBE32(b)\ > + ((((b)[0] & 0xFFul) << 24) | \ > + (((b)[1] & 0xFFul) << 16) | \ > + (((b)[2] & 0xFFul) << 8) | \ > + (((b)[3] & 0xFFul) << 0)) > +#define GETBE64(b)\ > + ((((b)[0] & 0xFFull) << 56) | \ > + (((b)[1] & 0xFFull) << 48) | \ > + (((b)[2] & 0xFFull) << 40) | \ > + (((b)[3] & 0xFFull) << 32) | \ > + (((b)[4] & 0xFFull) << 24) | \ > + (((b)[5] & 0xFFull) << 16) | \ > + (((b)[6] & 0xFFull) << 8) | \ > + (((b)[7] & 0xFFull) << 0)) > + > +#define PUTBE16(b, n)\ > + do{ \ > + (b)[0] = (n) >> 8; \ > + (b)[1] = (n) >> 0; \ > + } while(0) > + > +#define PUTBE32(b, n)\ > + do{ \ > + (b)[0] = (n) >> 24; \ > + (b)[1] = (n) >> 16; \ > + (b)[2] = (n) >> 8; \ > + (b)[3] = (n) >> 0; \ > + } while(0) > + > +#define PUTBE64(b, n)\ > + do{ \ > + (b)[0] = (n) >> 56; \ > + (b)[1] = (n) >> 48; \ > + (b)[2] = (n) >> 40; \ > + (b)[3] = (n) >> 32; \ > + (b)[4] = (n) >> 24; \ > + (b)[5] = (n) >> 16; \ > + (b)[6] = (n) >> 8; \ > + (b)[7] = (n) >> 0; \ > + } while(0) > + > +static int > +charval(int c, int *err) > +{ > + if(c >= '0' && c <= '9') > + return c - '0'; > + if(c >= 'a' && c <= 'f') > + return c - 'a' + 10; > + if(c >= 'A' && c <= 'F') > + return c - 'A' + 10; > + *err = 1; > + return -1; > +} > + > +static int > +hparse(struct got_object_id *h, char *b) > +{ > + int i, err; > + > + err = 0; > + for(i = 0; i < sizeof(h->sha1); i++){ > + err = 0; > + h->sha1[i] = 0; > + h->sha1[i] |= ((charval(b[2*i], &err) & 0xf) << 4); > + h->sha1[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0); > + if(err) > + return -1; > + } > + return 0; > +} > + > +static void * > +emalloc(size_t n) > +{ > + void *v; > + > + v = calloc(n, 1); > + if(v == NULL) > + err(1, "malloc:"); > + return v; > +} > + > +static void * > +erealloc(void *p, ulong n) > +{ > + void *v; > + > + v = realloc(p, n); > + if(v == NULL) > + err(1, "realloc:"); > + memset(v, 0, n); > + return v; > +} > + > +static int > +hasheq(struct got_object_id *a, struct got_object_id *b) > +{ > + return memcmp(a->sha1, b->sha1, sizeof(a->sha1)) == 0; > +} > + > +static char * > +typestr(int t) > +{ > + char *types[] = { > + "???", > + "commit", > + "tree", > + "blob", > + "tag", > + "odelta", > + "rdelta", > + }; > + if (t < 0 || t >= sizeof(types)/sizeof(types[0])) > + abort(); > + return types[t]; > +} > + > +static char * > +hashfmt(char *out, size_t nout, struct got_object_id *h) > +{ > + int i, n, c0, c1; > + char *p; > + > + if (nout < 2*sizeof(h->sha1) + 1) > + return NULL; > + p = out; > + for(i = 0; i < sizeof(h->sha1); i++){ > + n = (h->sha1[i] >> 4) & 0xf; > + c0 = (n >= 10) ? n-10 + 'a' : n + '0'; > + n = h->sha1[i] & 0xf; > + c1 = (n >= 10) ? n-10 + 'a' : n + '0'; > + *p++ = c0; > + *p++ = c1; > + } > + *p++ = 0; > + return out; > +} > + > +static void > +clear(Object *o) > +{ > + if(!o) > + return; > + > + assert(o->refs == 0); > + assert((o->flag & Ccache) == 0); > + assert(o->flag & Cloaded); > + switch(o->type){ > + case GCommit: > + if(!o->commit) > + break; > + free(o->commit->parent); > + free(o->commit->author); > + free(o->commit->committer); > + free(o->commit); > + o->commit = NULL; > + break; > + case GTree: > + if(!o->tree) > + break; > + free(o->tree->ent); > + free(o->tree); > + o->tree = NULL; > + break; > + default: > + break; > + } > + > + free(o->all); > + o->all = NULL; > + o->data = NULL; > + o->flag &= ~Cloaded; > +} > + > +static void > +unref(Object *o) > +{ > + if(!o) > + return; > + o->refs--; > + if(!o->refs) > + clear(o); > +} > + > +static Object* > +ref(Object *o) > +{ > + o->refs++; > + return o; > +} > + > +static void > +cache(Object *o) > +{ > + char buf[41]; > + Object *p; > + > + hashfmt(buf, sizeof(buf), &o->hash); > + if(o == lruhead) > + return; > + if(o == lrutail) > + lrutail = lrutail->prev; > + if(!(o->flag & Cexist)){ > + got_object_idset_add(objcache, &o->hash, o); > + o->id = next_object_id++; > + o->flag |= Cexist; > + } > + if(o->prev) > + o->prev->next = o->next; > + if(o->next) > + o->next->prev = o->prev; > + if(lrutail == o){ > + lrutail = o->prev; > + lrutail->next = NULL; > + }else if(!lrutail) > + lrutail = o; > + if(lruhead) > + lruhead->prev = o; > + o->next = lruhead; > + o->prev = NULL; > + lruhead = o; > + > + if(!(o->flag & Ccache)){ > + o->flag |= Ccache; > + ref(o); > + ncache++; > + } > + while(ncache > Cachemax){ > + p = lrutail; > + lrutail = p->prev; > + lrutail->next = NULL; > + p->flag &= ~Ccache; > + p->prev = NULL; > + p->next = NULL; > + unref(p); > + ncache--; > + } > +} > + > +static int > +preadbe32(FILE *b, int *v, off_t off) > +{ > + char buf[4]; > + > + if(fseek(b, off, 0) == -1) > + return -1; > + if(fread(buf, 1, sizeof(buf), b) == -1) > + return -1; > + *v = GETBE32(buf); > + > + return 0; > +} > +static int > +preadbe64(FILE *b, off_t *v, off_t off) > +{ > + char buf[8]; > + > + if(fseek(b, off, 0) == -1) > + return -1; > + if(fread(buf, 1, sizeof(buf), b) == -1) > + return -1; > + *v = GETBE64(buf); > + return 0; > +} > + > +static int > +readvint(char *p, char **pp) > +{ > + int i, n, c; > + > + i = 0; > + n = 0; > + do { > + c = *p++; > + n |= (c & 0x7f) << i; > + i += 7; > + } while (c & 0x80); > + *pp = p; > + > + return n; > +} > + > +static int > +applydelta(Object *dst, Object *base, char *d, int nd) > +{ > + char *r, *b, *ed, *er; > + int n, nr, c; > + off_t o, l; > + > + ed = d + nd; > + b = base->data; > + n = readvint(d, &d); > + if(n != base->size){ > + fprintf(stderr, "mismatched source size"); > + return -1; > + } > + > + nr = readvint(d, &d); > + r = emalloc(nr + 64); > + n = snprintf(r, 64, "%s %d", typestr(base->type), nr) + 1; > + dst->all = r; > + dst->type = base->type; > + dst->data = r + n; > + dst->size = nr; > + er = dst->data + nr; > + r = dst->data; > + > + while(1){ > + if(d == ed) > + break; > + c = *d++; > + if(!c){ > + fprintf(stderr, "bad delta encoding"); > + return -1; > + } > + /* copy from base */ > + if(c & 0x80){ > + o = 0; > + l = 0; > + /* Offset in base */ > + if(c & 0x01 && d != ed) o |= (*d++ << 0) & 0x000000ff; > + if(c & 0x02 && d != ed) o |= (*d++ << 8) & 0x0000ff00; > + if(c & 0x04 && d != ed) o |= (*d++ << 16) & 0x00ff0000; > + if(c & 0x08 && d != ed) o |= (*d++ << 24) & 0xff000000; > + > + /* Length to copy */ > + if(c & 0x10 && d != ed) l |= (*d++ << 0) & 0x0000ff; > + if(c & 0x20 && d != ed) l |= (*d++ << 8) & 0x00ff00; > + if(c & 0x40 && d != ed) l |= (*d++ << 16) & 0xff0000; > + if(l == 0) l = 0x10000; > + > + assert(o + l <= base->size); > + memmove(r, b + o, l); > + r += l; > + /* inline data */ > + }else{ > + memmove(r, d, c); > + d += c; > + r += c; > + } > + > + } > + if(r != er){ > + fprintf(stderr, "truncated delta (%zd)", er - r); > + return -1; > + } > + > + return nr; > +} > + > +static int > +readrdelta(FILE *f, Object *o, int nd, int flag) > +{ > + const struct got_error *e; > + struct got_object_id h; > + Object *b; > + uint8_t *d; > + size_t n; > + > + d = NULL; > + if(fread(h.sha1, 1, sizeof(h.sha1), f) != sizeof(h.sha1)) > + goto error; > + if(hasheq(&o->hash, &h)) > + goto error; > + if ((e = got_inflate_to_mem(&d, &n, f)) != NULL) > + goto error; > + o->len = ftello(f) - o->off; > + if(d == NULL || n != nd) > + goto error; > + if((b = readidxobject(f, h, flag)) == NULL) > + goto error; > + if(applydelta(o, b, d, n) == -1) > + goto error; > + free(d); > + return 0; > +error: > + free(d); > + return -1; > +} > + > +static int > +readodelta(FILE *f, Object *o, off_t nd, off_t p, int flag) > +{ > + Object b; > + uint8_t *d; > + off_t r; > + size_t n; > + int c; > + > + r = 0; > + d = NULL; > + while(1){ > + if((c = fgetc(f)) == -1) > + goto error; > + r |= c & 0x7f; > + if (!(c & 0x80)) > + break; > + r++; > + r <<= 7; > + }while(c & 0x80); > + > + if(r > p){ > + fprintf(stderr, "junk offset -%lld (from %lld)", r, p); > + goto error; > + } > + > + if (got_inflate_to_mem(&d, &n, f) == NULL) > + goto error; > + o->len = ftello(f) - o->off; > + if(d == NULL || n != nd) > + goto error; > + if(fseek(f, p - r, 0) == -1) > + goto error; > + if(readpacked(f, &b, flag) == -1) > + goto error; > + if(applydelta(o, &b, d, nd) == -1) > + goto error; > + free(d); > + return 0; > +error: > + free(d); > + return -1; > +} > + > +static int > +readpacked(FILE *f, Object *o, int flag) > +{ > + const struct got_error *e; > + int c, s, n; > + off_t l, p; > + size_t ndata; > + uint8_t *data; > + Type t; > + Buf b; > + > + p = ftello(f); > + c = fgetc(f); > + if(c == -1) > + return -1; > + l = c & 0xf; > + s = 4; > + t = (c >> 4) & 0x7; > + if(!t){ > + fprintf(stderr, "unknown type for byte %x", c); > + return -1; > + } > + while(c & 0x80){ > + if((c = fgetc(f)) == -1) > + return -1; > + l |= (c & 0x7f) << s; > + s += 7; > + } > + > + switch(t){ > + default: > + fprintf(stderr, "invalid object at %lld", ftello(f)); > + return -1; > + case GCommit: > + case GTree: > + case GTag: > + case GBlob: > + b.sz = 64 + l; > + > + b.data = emalloc(b.sz); > + n = snprintf(b.data, 64, "%s %lld", typestr(t), l) + 1; > + b.len = n; > + e = got_inflate_to_mem(&data, &ndata, f); > + if (e != NULL || n + ndata >= b.sz) { > + free(b.data); > + return -1; > + } > + memcpy(b.data + n, data, ndata); > + o->len = ftello(f) - o->off; > + o->type = t; > + o->all = b.data; > + o->data = b.data + n; > + o->size = ndata; > + free(data); > + break; > + case GOdelta: > + if(readodelta(f, o, l, p, flag) == -1) > + return -1; > + break; > + case GRdelta: > + if(readrdelta(f, o, l, flag) == -1) > + return -1; > + break; > + } > + o->flag |= Cloaded|flag; > + return 0; > +} > + > +static int > +readloose(FILE *f, Object *o, int flag) > +{ > + struct { char *tag; int type; } *p, types[] = { > + {"blob", GBlob}, > + {"tree", GTree}, > + {"commit", GCommit}, > + {"tag", GTag}, > + {NULL}, > + }; > + char *s, *e; > + uint8_t *d; > + off_t sz; > + size_t n; > + int l; > + > + if (got_inflate_to_mem(&d, &n, f) != NULL) > + return -1; > + > + s = (char *)d; > + o->type = GNone; > + for(p = types; p->tag; p++){ > + l = strlen(p->tag); > + if(strncmp(s, p->tag, l) == 0){ > + s += l; > + o->type = p->type; > + while(!isspace(*s)) > + s++; > + break; > + } > + } > + if(o->type == GNone){ > + free(o->data); > + return -1; > + } > + sz = strtol(s, &e, 0); > + if(e == s || *e++ != 0){ > + fprintf(stderr, "malformed object header"); > + goto error; > + } > + if(sz != n - (e - (char *)d)){ > + fprintf(stderr, "mismatched sizes"); > + goto error; > + } > + o->size = sz; > + o->data = e; > + o->all = d; > + o->flag |= Cloaded|flag; > + return 0; > + > +error: > + free(d); > + return -1; > +} > + > +static off_t > +searchindex(FILE *f, struct got_object_id h) > +{ > + int lo, hi, idx, i, nent; > + off_t o, oo; > + struct got_object_id hh; > + > + o = 8; > + /* > + * Read the fanout table. The fanout table > + * contains 256 entries, corresponsding to > + * the first byte of the hash. Each entry > + * is a 4 byte big endian integer, containing > + * the total number of entries with a leading > + * byte <= the table index, allowing us to > + * rapidly do a binary search on them. > + */ > + if (h.sha1[0] == 0){ > + lo = 0; > + if(preadbe32(f, &hi, o) == -1) > + goto err; > + } else { > + o += h.sha1[0]*4 - 4; > + if(preadbe32(f, &lo, o + 0) == -1) > + goto err; > + if(preadbe32(f, &hi, o + 4) == -1) > + goto err; > + } > + if(hi == lo) > + goto notfound; > + if(preadbe32(f, &nent, 8 + 255*4) == -1) > + goto err; > + > + /* > + * Now that we know the range of hashes that the > + * entry may exist in, read them in so we can do > + * a bsearch. > + */ > + idx = -1; > + fseek(f, Hashsz*lo + 8 + 256*4, 0); > + for(i = 0; i < hi - lo; i++){ > + if(fread(hh.sha1, 1, sizeof(hh.sha1), f) == -1) > + goto err; > + if(hasheq(&hh, &h)) > + idx = lo + i; > + } > + if(idx == -1) > + goto notfound; > + > + > + /* > + * We found the entry. If it's 32 bits, then we > + * can just return the oset, otherwise the 32 > + * bit entry contains the oset to the 64 bit > + * entry. > + */ > + oo = 8; /* Header */ > + oo += 256*4; /* Fanout table */ > + oo += Hashsz*nent; /* Hashes */ > + oo += 4*nent; /* Checksums */ > + oo += 4*idx; /* Offset offset */ > + if(preadbe32(f, &i, oo) == -1) > + goto err; > + o = i & 0xffffffff; > + if(o & (1ull << 31)){ > + o &= 0x7fffffff; > + if(preadbe64(f, &o, o) == -1) > + goto err; > + } > + return o; > + > +err: > + fprintf(stderr, "unable to read packfile\n"); > + return -1; > +notfound: > + { > + char hstr[41]; > + hashfmt(hstr, sizeof(hstr), &h); > + fprintf(stdout, "could not find object %s\n", hstr); > + } > + return -1; > +} > + > +/* > + * Scans for non-empty word, copying it into buf. > + * Strips off word, leading, and trailing space > + * from input. > + * > + * Returns -1 on empty string or error, leaving > + * input unmodified. > + */ > +static int > +scanword(char **str, int *nstr, char *buf, int nbuf) > +{ > + char *p; > + int n, r; > + > + r = -1; > + p = *str; > + n = *nstr; > + while(n && isblank(*p)){ > + n--; > + p++; > + } > + > + for(; n && *p && !isspace(*p); p++, n--){ > + r = 0; > + *buf++ = *p; > + nbuf--; > + if(nbuf == 0) > + return -1; > + } > + while(n && isblank(*p)){ > + n--; > + p++; > + } > + *buf = 0; > + *str = p; > + *nstr = n; > + return r; > +} > + > +static void > +nextline(char **str, int *nstr) > +{ > + char *s; > + > + if((s = strchr(*str, '\n')) != NULL){ > + *nstr -= s - *str + 1; > + *str = s + 1; > + } > +} > + > +static int > +parseauthor(char **str, int *nstr, char **name, off_t *time) > +{ > + return 0; > +} > + > +static void > +parsecommit(Object *o) > +{ > + char *p, *t, buf[128]; > + int np; > + > + p = o->data; > + np = o->size; > + o->commit = emalloc(sizeof(Cinfo)); > + while(1){ > + if(scanword(&p, &np, buf, sizeof(buf)) == -1) > + break; > + if(strcmp(buf, "tree") == 0){ > + if(scanword(&p, &np, buf, sizeof(buf)) == -1) > + errx(1, "invalid commit: tree missing"); > + if(hparse(&o->commit->tree, buf) == -1) > + errx(1, "invalid commit: garbled tree"); > + }else if(strcmp(buf, "parent") == 0){ > + if(scanword(&p, &np, buf, sizeof(buf)) == -1) > + errx(1, "invalid commit: missing parent"); > + o->commit->parent = realloc(o->commit->parent, ++o->commit->nparent * sizeof(struct got_object_id)); > + if(!o->commit->parent) > + err(1, "unable to malloc: "); > + if(hparse(&o->commit->parent[o->commit->nparent - 1], buf) == -1) > + errx(1, "invalid commit: garbled parent"); > + }else if(strcmp(buf, "author") == 0){ > + parseauthor(&p, &np, &o->commit->author, &o->commit->mtime); > + }else if(strcmp(buf, "committer") == 0){ > + parseauthor(&p, &np, &o->commit->committer, &o->commit->ctime); > + }else if(strcmp(buf, "gpgsig") == 0){ > + /* just drop it */ > + if((t = strstr(p, "-----END PGP SIGNATURE-----")) == NULL) > + errx(1, "malformed gpg signature"); > + np -= t - p; > + p = t; > + } > + nextline(&p, &np); > + } > + while (np && isspace(*p)) { > + p++; > + np--; > + } > + o->commit->msg = p; > + o->commit->nmsg = np; > +} > + > +static void > +parsetree(Object *o) > +{ > + char *p, buf[256]; > + int np, nn, m; > + Dirent *t; > + > + p = o->data; > + np = o->size; > + o->tree = emalloc(sizeof(Tinfo)); > + while(np > 0){ > + if(scanword(&p, &np, buf, sizeof(buf)) == -1) > + break; > + o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent)); > + t = &o->tree->ent[o->tree->nent - 1]; > + memset(t, 0, sizeof(Dirent)); > + m = strtol(buf, NULL, 8); > + /* FIXME: symlinks and other BS */ > + if(m == 0160000){ > + t->mode |= S_IFDIR; > + t->modref = 1; > + } > + t->mode = m & 0777; > + if(m & 0040000) > + t->mode |= S_IFDIR; > + t->name = p; > + nn = strlen(p) + 1; > + p += nn; > + np -= nn; > + if(np < sizeof(t->h.sha1)) > + errx(1, "malformed tree, remaining %d (%s)", np, p); > + memcpy(t->h.sha1, p, sizeof(t->h.sha1)); > + p += sizeof(t->h.sha1); > + np -= sizeof(t->h.sha1); > + } > +} > + > +void > +parseobject(Object *o) > +{ > + if(o->flag & Cparsed) > + return; > + switch(o->type){ > + case GTree: parsetree(o); break; > + case GCommit: parsecommit(o); break; > + //case GTag: parsetag(o); break; > + default: break; > + } > + o->flag |= Cparsed; > +} > + > +static Object* > +readidxobject(FILE *idx, struct got_object_id h, int flag) > +{ > + char path[Pathmax]; > + char hbuf[41]; > + FILE *f; > + Object *obj; > + int l, n; > + off_t o; > + struct dirent *ent; > + DIR *d; > + > + > + if ((obj = got_object_idset_lookup_data(objcache, &h))) { > + if(obj->flag & Cloaded) > + return obj; > + if(obj->flag & Cidx){ > + assert(idx != NULL); > + o = ftello(idx); > + if(fseek(idx, obj->off, 0) == -1) > + errx(1, "could not seek to object offset"); > + if(readpacked(idx, obj, flag) == -1) > + errx(1, "could not reload object"); > + if(fseek(idx, o, 0) == -1) > + errx(1, "could not restore offset"); > + cache(obj); > + return obj; > + } > + } > + > + d = NULL; > + /* We're not putting it in the cache yet... */ > + obj = emalloc(sizeof(Object)); > + obj->id = next_object_id + 1; > + obj->hash = h; > + > + hashfmt(hbuf, sizeof(hbuf), &h); > + snprintf(path, sizeof(path), ".git/objects/%c%c/%s", hbuf[0], hbuf[1], hbuf + 2); > + if((f = fopen(path, "r")) != NULL){ > + if(readloose(f, obj, flag) == -1) > + goto error; > + fclose(f); > + parseobject(obj); > + hashfmt(hbuf, sizeof(hbuf), &obj->hash); > + fprintf(stderr, "object %s cached", hbuf); > + cache(obj); > + return obj; > + } > + > + o = -1; > + if ((d = opendir(".git/objects/pack")) == NULL) > + err(1, "open pack dir"); > + while ((ent = readdir(d)) != NULL) { > + l = strlen(ent->d_name); > + if(l > 4 && strcmp(ent->d_name + l - 4, ".idx") != 0) > + continue; > + snprintf(path, sizeof(path), ".git/objects/pack/%s", ent->d_name); > + if((f = fopen(path, "r")) == NULL) > + continue; > + o = searchindex(f, h); > + fclose(f); > + if(o == -1) > + continue; > + break; > + } > + closedir(d); > + > + if (o == -1) > + goto error; > + > + if((n = snprintf(path, sizeof(path), "%s", path)) >= sizeof(path) - 4) > + goto error; > + memcpy(path + n - 4, ".pack", 6); > + if((f = fopen(path, "r")) == NULL) > + goto error; > + if(fseek(f, o, 0) == -1) > + goto error; > + if(readpacked(f, obj, flag) == -1) > + goto error; > + fclose(f); > + parseobject(obj); > + cache(obj); > + return obj; > +error: > + free(obj); > + return NULL; > +} > + > +Object* > +readobject(struct got_object_id h) > +{ > + Object *o; > + > + o = readidxobject(NULL, h, 0); > + if(o) > + ref(o); > + return o; > +} > + > +int > +objcmp(const void *pa, const void *pb) > +{ > + Object *a, *b; > + > + a = *(Object**)pa; > + b = *(Object**)pb; > + return memcmp(a->hash.sha1, b->hash.sha1, sizeof(a->hash.sha1)); > +} > + > +static int > +hwrite(FILE *b, void *buf, int len, SHA1_CTX *ctx) > +{ > + SHA1Update(ctx, buf, len); > + return fwrite(buf, 1, len, b); > +} > + > +static uint32_t > +objectcrc(FILE *f, Object *o) > +{ > + char buf[8096]; > + int n, r; > + > + o->crc = 0; > + fseek(f, o->off, 0); > + for(n = o->len; n > 0; n -= r){ > + r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f); > + if(r == -1) > + return -1; > + if(r == 0) > + return 0; > + o->crc = crc32(o->crc, buf, r); > + } > + return 0; > +} > + > +int > +indexpack(int packfd, int idxfd, struct got_object_id packhash) > +{ > + char hdr[4*3], buf[8]; > + int nobj, nvalid, nbig, n, i, step; > + Object *o, **objects; > + char *valid; > + SHA1_CTX ctx, objctx; > + FILE *f; > + struct got_object_id h; > + int c; > + > + if ((f = fdopen(packfd, "r")) == NULL) > + return -1; > + if (fseek(f, 0, SEEK_SET) == -1) > + return -1; > + if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) { > + fprintf(stderr, "short read on header"); > + return -1; > + } > + if (memcmp(hdr, "PACK\0\0\0\2", 8) != 0) { > + fprintf(stderr, "invalid header"); > + return -1; > + } > + > + nvalid = 0; > + nobj = GETBE32(hdr + 8); > + objects = calloc(nobj, sizeof(Object*)); > + valid = calloc(nobj, sizeof(char)); > + step = nobj/100; > + if(!step) > + step++; > + while (nvalid != nobj) { > + fprintf(stderr, "indexing (%d/%d):", nvalid, nobj); > + n = 0; > + for (i = 0; i < nobj; i++) { > + if (valid[i]) { > + n++; > + continue; > + } > + if (i % step == 0) > + fprintf(stderr, "."); > + if (!objects[i]) { > + o = emalloc(sizeof(Object)); > + o->off = ftello(f); > + objects[i] = o; > + } > + o = objects[i]; > + fseek(f, o->off, 0); > + if (readpacked(f, o, Cidx) == 0){ > + SHA1Init(&objctx); > + SHA1Update(&objctx, (uint8_t*)o->all, o->size + strlen(o->all) + 1); > + SHA1Final(o->hash.sha1, &objctx); > + cache(o); > + valid[i] = 1; > + n++; > + } > + if(objectcrc(f, o) == -1) > + return -1; > + } > + fprintf(stderr, "\n"); > + if (n == nvalid) { > + errx(1, "fix point reached too early: %d/%d", nvalid, nobj); > + goto error; > + } > + nvalid = n; > + } > + fclose(f); > + > + SHA1Init(&ctx); > + qsort(objects, nobj, sizeof(Object*), objcmp); > + if((f = fdopen(idxfd, "w")) == NULL) > + return -1; > + if(hwrite(f, "\xfftOc\x00\x00\x00\x02", 8, &ctx) != 8) > + goto error; > + /* fanout table */ > + c = 0; > + for(i = 0; i < 256; i++){ > + while(c < nobj && (objects[c]->hash.sha1[0] & 0xff) <= i) > + c++; > + PUTBE32(buf, c); > + hwrite(f, buf, 4, &ctx); > + } > + for(i = 0; i < nobj; i++){ > + o = objects[i]; > + hwrite(f, o->hash.sha1, sizeof(o->hash.sha1), &ctx); > + } > + > + /* pointless, nothing uses this */ > + for(i = 0; i < nobj; i++){ > + PUTBE32(buf, objects[i]->crc); > + hwrite(f, buf, 4, &ctx); > + } > + > + nbig = 0; > + for(i = 0; i < nobj; i++){ > + if(objects[i]->off <= (1ull<<31)) > + PUTBE32(buf, objects[i]->off); > + else > + PUTBE32(buf, (1ull << 31) | nbig++); > + hwrite(f, buf, 4, &ctx); > + } > + for(i = 0; i < nobj; i++){ > + if(objects[i]->off > (1ull<<31)){ > + PUTBE64(buf, objects[i]->off); > + hwrite(f, buf, 8, &ctx); > + } > + } > + hwrite(f, packhash.sha1, sizeof(packhash.sha1), &ctx); > + SHA1Final(h.sha1, &ctx); > + fwrite(h.sha1, 1, sizeof(h.sha1), f); > + > + free(objects); > + free(valid); > + fclose(f); > + return 0; > + > +error: > + free(objects); > + free(valid); > + fclose(f); > + return -1; > +} > + > +int > +main(int argc, char **argv) > +{ > + const struct got_error *err = NULL; > + struct got_object_id packhash; > + struct imsgbuf ibuf; > + struct imsg imsg; > + int packfd, idxfd; > + > + objcache = got_object_idset_alloc(); > + imsg_init(&ibuf, GOT_IMSG_FD_CHILD); > + if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) { > + if (err->code == GOT_ERR_PRIVSEP_PIPE) > + err = NULL; > + goto done; > + } > + if (imsg.hdr.type == GOT_IMSG_STOP) > + goto done; > + if (imsg.hdr.type != GOT_IMSG_IDXPACK_REQUEST) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + goto done; > + } > + if (imsg.hdr.len - IMSG_HEADER_SIZE != SHA1_DIGEST_LENGTH) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + goto done; > + } > + packfd = imsg.fd; > + memcpy(packhash.sha1, imsg.data, SHA1_DIGEST_LENGTH); > + got_privsep_send_ack(&ibuf); > + > + if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) { > + if (err->code == GOT_ERR_PRIVSEP_PIPE) > + err = NULL; > + goto done; > + } > + if (imsg.hdr.type == GOT_IMSG_STOP) > + goto done; > + if (imsg.hdr.type != GOT_IMSG_TMPFD) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + goto done; > + } > + if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + goto done; > + } > + idxfd = imsg.fd; > + > + indexpack(packfd, idxfd, packhash); > +done: > + if(err != NULL) > + got_privsep_send_error(&ibuf, err); > + else > + err = got_privsep_send_index_pack_done(&ibuf); > + if(err != NULL) { > + fprintf(stderr, "%s: %s\n", getprogname(), err->msg); > + got_privsep_send_error(&ibuf, err); > + } > + > + exit(0); > +} > > -- > Ori Bernstein > >