Download raw body.
got clone: start of effort.
On Fri, Nov 29, 2019 at 12:17:36PM -0800, Ori Bernstein wrote:
> This diff implements the fetch protocol support for git+ssh, and almost
> implements it for unencrypted git:// (haven't written the code to
> open the socket yet)
>
> It's a quick and dirty port/airdrop/privsepification of the code I wrote
> for git9, so it doesn't match the style or the data structures used in
> the rest of got. We'll need to do a few passes to clean it up and make it
> fit before i'd be happy getting it in, but I'd like to show where things
> are.
>
> There's no unveil or pledge.
>
> The code in git-fetch-pack should be usable for incremental fetches, too,
> once we do the plumbing to figure out which objects are in the tree.
>
> I'll be doing another diff to implement git push, doing the same kind of
> airdrop.
Thanks so much for working on this!
Comments inline:
> diff --git a/got/Makefile b/got/Makefile
> index 709948a..27b8780 100644
> --- a/got/Makefile
> +++ b/got/Makefile
> @@ -8,7 +8,7 @@ SRCS= got.c blame.c commit_graph.c delta.c diff.c \
> object_idset.c object_parse.c opentemp.c path.c pack.c \
> privsep.c reference.c repository.c sha1.c worktree.c \
> inflate.c buf.c rcsutil.c diff3.c lockfile.c \
> - deflate.c object_create.c delta_cache.c
> + deflate.c object_create.c delta_cache.c fetch.c
> MAN = ${PROG}.1 got-worktree.5 git-repository.5
>
> CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib
> diff --git a/got/got.c b/got/got.c
> index 9f40340..22bd4d7 100644
> --- a/got/got.c
> +++ b/got/got.c
> @@ -45,6 +45,7 @@
> #include "got_worktree.h"
> #include "got_diff.h"
> #include "got_commit_graph.h"
> +#include "got_lib_fetch.h"
> #include "got_blame.h"
> #include "got_privsep.h"
> #include "got_opentemp.h"
> @@ -80,6 +81,7 @@ __dead static void usage(int);
> __dead static void usage_init(void);
> __dead static void usage_import(void);
> __dead static void usage_checkout(void);
> +__dead static void usage_clone(void);
> __dead static void usage_update(void);
> __dead static void usage_log(void);
> __dead static void usage_diff(void);
> @@ -104,6 +106,7 @@ __dead static void usage_cat(void);
>
> static const struct got_error* cmd_init(int, char *[]);
> static const struct got_error* cmd_import(int, char *[]);
> +static const struct got_error* cmd_clone(int, char *[]);
> static const struct got_error* cmd_checkout(int, char *[]);
> static const struct got_error* cmd_update(int, char *[]);
> static const struct got_error* cmd_log(int, char *[]);
> @@ -131,6 +134,7 @@ static struct got_cmd got_commands[] = {
> { "init", cmd_init, usage_init, "in" },
> { "import", cmd_import, usage_import, "im" },
> { "checkout", cmd_checkout, usage_checkout, "co" },
> + { "clone", cmd_clone, usage_clone, "cl" },
> { "update", cmd_update, usage_update, "up" },
> { "log", cmd_log, usage_log, "" },
> { "diff", cmd_diff, usage_diff, "di" },
> @@ -794,6 +798,13 @@ done:
> return error;
> }
>
> +__dead static void
> +usage_clone(void)
> +{
> + fprintf(stderr, "usage: %s clone repo-path\n", getprogname());
clone repo-url?
> + exit(1);
> +}
> +
> __dead static void
> usage_checkout(void)
> {
> @@ -969,6 +980,34 @@ resolve_commit_arg(struct got_object_id **commit_id,
> return err;
> }
>
> +static const struct got_error *
> +cmd_clone(int argc, char *argv[])
> +{
> + char *uri, *branch_filter, *dirname;
> + int ch;
> +
> + while ((ch = getopt(argc, argv, "b:c:p:")) != -1) {
-c and -p aren't used.
> + switch (ch) {
> + case 'b':
> + branch_filter = optarg;
> + break;
> + default:
> + usage_clone();
> + break;
> + }
> + }
> + argc -= optind;
> + argv += optind;
> + uri = argv[0];
> + if(argc == 1)
> + dirname = NULL;
> + else if(argc == 2)
> + dirname = argv[1];
> + else
> + usage_clone();
> + return got_clone(argv[0], branch_filter, dirname);
> +}
> +
> static const struct got_error *
> cmd_checkout(int argc, char *argv[])
> {
> diff --git a/include/got_error.h b/include/got_error.h
> index d0df125..50c6e2d 100644
> --- a/include/got_error.h
> +++ b/include/got_error.h
> @@ -128,6 +128,9 @@
> #define GOT_ERR_REGEX 112
> #define GOT_ERR_REF_NAME_MINUS 113
> #define GOT_ERR_GITCONFIG_SYNTAX 114
> +#define GOT_ERR_FETCH_FAILED 115
> +#define GOT_ERR_PARSE_URI 116
> +#define GOT_ERR_BAD_PROTO 117
>
> static const struct got_error {
> int code;
> @@ -262,6 +265,9 @@ static const struct got_error {
> { GOT_ERR_REGEX, "regular expression error" },
> { GOT_ERR_REF_NAME_MINUS, "reference name may not start with '-'" },
> { GOT_ERR_GITCONFIG_SYNTAX, "gitconfig syntax error" },
> + { GOT_ERR_FETCH_FAILED, "fetch failed" },
> + { GOT_ERR_PARSE_URI, "failed to parse uri" },
> + { GOT_ERR_BAD_PROTO, "unknown protocol" },
> };
>
> /*
> diff --git a/lib/fetch.c b/lib/fetch.c
> new file mode 100644
> index 0000000..dfba5c3
> --- /dev/null
> +++ b/lib/fetch.c
> @@ -0,0 +1,342 @@
> +/*
> + * Copyright (c) 2018, 2019 Stefan Sperling <stsp@openbsd.org>
PLease add your own copyright claim here. I didn't write most of this.
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/queue.h>
> +#include <sys/uio.h>
> +#include <sys/socket.h>
> +#include <sys/wait.h>
> +#include <sys/syslimits.h>
> +#include <sys/resource.h>
> +
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdint.h>
> +#include <sha1.h>
> +#include <zlib.h>
> +#include <ctype.h>
> +#include <limits.h>
> +#include <imsg.h>
> +#include <time.h>
> +#include <uuid.h>
> +
> +#include "got_error.h"
> +#include "got_reference.h"
> +#include "got_repository.h"
> +#include "got_path.h"
> +#include "got_cancel.h"
> +#include "got_worktree.h"
> +#include "got_object.h"
> +
> +#include "got_lib_delta.h"
> +#include "got_lib_inflate.h"
> +#include "got_lib_object.h"
> +#include "got_lib_object_parse.h"
> +#include "got_lib_object_create.h"
> +#include "got_lib_pack.h"
> +#include "got_lib_sha1.h"
> +#include "got_lib_privsep.h"
> +#include "got_lib_object_cache.h"
> +#include "got_lib_repository.h"
> +
> +#define GOT_PROTOMAX 64
> +#define GOT_HOSTMAX 256
> +#define GOT_PATHMAX 512
> +#define GOT_REPOMAX 256
> +#define GOT_PORTMAX 16
> +#define GOT_URIMAX 1024
> +
> +static int
> +mkpath(char *path)
> +{
> + char *p, namebuf[PATH_MAX];
> + struct stat sb;
> + int done;
> +
> + while (*path == '/')
> + path++;
> + if(strlcpy(namebuf, path, sizeof(namebuf)) >= sizeof(namebuf)) {
> + errno = ENAMETOOLONG;
> + return -1;
> + }
> +
> + p = namebuf;
> + for (;;) {
> + p += strspn(p, "/");
> + p += strcspn(p, "/");
> + done = (*p == '\0');
> + *p = '\0';
> +
> + if (mkdir(namebuf, 0755) != 0) {
> + int mkdir_errno = errno;
> + if (stat(path, &sb) == -1) {
> + /* Not there; use mkdir()s errno */
> + errno = mkdir_errno;
> + return -1;
> + }
> + if (!S_ISDIR(sb.st_mode)) {
> + /* Is there, but isn't a directory */
> + errno = ENOTDIR;
> + return -1;
> + }
> + }
> +
> + if (done)
> + break;
> + *p = '/';
> + }
> +
> + return 0;
> +}
> +
> +static int
> +hassuffix(char *base, char *suf)
> +{
> + int nb, ns;
> +
> + nb = strlen(base);
> + ns = strlen(suf);
> + if(ns <= nb && strcmp(base + (nb - ns), suf) == 0)
> + return 1;
> + return 0;
> +}
> +
> +static int
> +grab(char *dst, int n, char *p, char *e)
> +{
> + int l;
> +
> + l = e - p;
> + if(l >= n) {
> + errno = ENAMETOOLONG;
> + return -1;
> + }
> + return strlcpy(dst, p, l + 1);
> +}
> +
> +static int
> +got_dial_ssh(char *host, char *port, char *path, char *direction)
> +{
> + int pid, pfd[2];
> + char cmd[64];
> +
> + if(pipe(pfd) == -1)
> + return -1;
> + pid = fork();
> + if(pid == -1)
> + return -1;
> + if(pid == 0){
> + close(pfd[1]);
> + dup2(pfd[0], 0);
> + dup2(pfd[0], 1);
> + snprintf(cmd, sizeof(cmd), "git-%s-pack", direction);
> + execlp("ssh", "ssh", host, cmd, path, NULL);
> + abort();
> + }else{
> + close(pfd[0]);
> + return pfd[1];
> + }
> +}
> +
> +#if 0
> +int
> +got_dial_git(char *host, char *port, char *path, char *direction)
> +{
> + struct sockaddr_storage sk;
> + char *ds, cmd[128];
> + int fd, l;
> +
> + ds = netmkaddr(host, "tcp", port);
> + fd = dial(ds, NULL, NULL, NULL);
> + if(fd == -1)
> + return -1;
> + if(chattygit)
> + fprint(2, "dial %s %s git-%s-pack %s\n", host, port, direction, path);
> + l = snprint(cmd, sizeof(cmd), "git-%s-pack %s\n", direction, path);
> + if(writepkt(fd, cmd, l + 1) == -1){
> + print("failed to write message\n");
> + close(fd);
> + return -1;
> + }
> + return fd;
> +}
> +#endif
> +
> +int
> +got_parse_uri(char *uri, char *proto, char *host, char *port, char *path, char *repo)
> +{
> + char *s, *p, *q;
> + int n, hasport;
> +
> + p = strstr(uri, "://");
> + if(!p){
> + //werrstr("missing protocol");
> + return -1;
> + }
> + if (grab(proto, GOT_PROTOMAX, uri, p) == -1)
> + return -1;
> + hasport = (strcmp(proto, "git") == 0 || strstr(proto, "http") == proto);
> + s = p + 3;
> + p = NULL;
> + if(!hasport){
> + p = strstr(s, ":");
> + if(p != NULL)
> + p++;
> + }
> + if(p == NULL)
> + p = strstr(s, "/");
> + if(p == NULL || strlen(p) == 1){
> + //werrstr("missing path");
> + return -1;
> + }
> +
> + q = memchr(s, ':', p - s);
> + if(q){
> + grab(host, GOT_HOSTMAX, s, q);
> + grab(port, GOT_PORTMAX, q + 1, p);
> + }else{
> + grab(host, GOT_HOSTMAX, s, p);
> + snprintf(port, GOT_PORTMAX, "9418");
> + }
> +
> + snprintf(path, GOT_PATHMAX, "%s", p);
> + p = strrchr(p, '/') + 1;
> + if(!p || strlen(p) == 0){
> + //werrstr("missing repository in uri");
> + return -1;
> + }
> + n = strlen(p);
> + if(hassuffix(p, ".git"))
> + n -= 4;
> + grab(repo, GOT_REPOMAX, p, p + n);
> + return 0;
> +}
> +
> +const struct got_error*
> +got_clone(char *uri, char *branch_filter, char *dirname)
> +{
> + char proto[GOT_PROTOMAX], host[GOT_HOSTMAX], port[GOT_PORTMAX];
> + char repo[GOT_REPOMAX], path[GOT_PATHMAX];
> + int imsg_fetchfds[2], imsg_idxfds[2], fetchfd;
> + int packfd, npackfd, idxfd, nidxfd, status;
> + struct got_object_id packhash;
> + const struct got_error *err;
> + struct imsgbuf ibuf;
> + pid_t pid;
> +
> + fetchfd = -1;
> + if (got_parse_uri(uri, proto, host, port, path, repo) == -1)
> + return got_error(GOT_ERR_PARSE_URI);
> + if (dirname == NULL)
> + dirname = repo;
> + err = got_repo_init(dirname);
> + if (err != NULL)
> + return err;
> + if (chdir(dirname))
> + return got_error_from_errno("enter new repo");
> + if (mkpath(".git/objects/pack") == -1)
> + return got_error_from_errno("mkpath");
> + packfd = open(".git/objects/pack/fetching.pack", O_CREAT|O_RDWR, 0644);
> + if (packfd == -1)
> + return got_error_from_errno("open pack");
> + npackfd = dup(packfd);
> + if (npackfd == -1)
> + return got_error_from_errno("dup");
> + idxfd = open(".git/objects/pack/fetching.idx", O_CREAT|O_RDWR, 0644);
> + if (idxfd == -1)
> + return got_error_from_errno("open pack");
> + nidxfd = dup(idxfd);
> + if (nidxfd == -1)
> + return got_error_from_errno("dup");
> +
> + if(strcmp(proto, "ssh") == 0 || strcmp(proto, "git+ssh") == 0)
> + fetchfd = got_dial_ssh(host, port, path, "upload");
> + //else if(strcmp(proto, "git") == 0)
> + // fetchfd = got_dial_git(host, port, path, "upload");
> + else if(strcmp(proto, "http") == 0 || strcmp(proto, "git+http") == 0)
> + err = got_error(GOT_ERR_BAD_PROTO);
> + else
> + err = got_error(GOT_ERR_BAD_PROTO);
> +
> + if (fetchfd == -1)
> + err = got_error_from_errno("dial uri");
> + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fetchfds) == -1)
> + return got_error_from_errno("socketpair");
> +
> + pid = fork();
> + if (pid == -1)
> + return got_error_from_errno("fork");
> + else if (pid == 0)
> + got_privsep_exec_child(imsg_fetchfds, GOT_PATH_PROG_FETCH_PACK, ".");
> +
> + if (close(imsg_fetchfds[1]) != 0)
> + return got_error_from_errno("close");
> + imsg_init(&ibuf, imsg_fetchfds[0]);
> + err = got_privsep_send_fetch_req(&ibuf, fetchfd);
> + if (err != NULL)
> + return err;
> + err = got_privsep_wait_ack(&ibuf);
> + if (err != NULL)
> + return err;
> + err = got_privsep_send_tmpfd(&ibuf, npackfd);
> + if (err != NULL)
> + return err;
> + npackfd = dup(packfd);
> + if (npackfd == -1)
> + return got_error_from_errno("dup");
> + err = got_privsep_wait_fetch_done(&ibuf, &packhash);
> + if (err != NULL)
> + return err;
> + if (waitpid(pid, &status, 0) == -1)
> + return got_error_from_errno("child exit");
> +
> + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_idxfds) == -1)
> + return got_error_from_errno("socketpair");
> + pid = fork();
> + if (pid == -1)
> + return got_error_from_errno("fork");
> + else if (pid == 0)
> + got_privsep_exec_child(imsg_idxfds, GOT_PATH_PROG_INDEX_PACK, ".");
> + if (close(imsg_idxfds[1]) != 0)
> + return got_error_from_errno("close");
> + imsg_init(&ibuf, imsg_idxfds[0]);
> +
> + err = got_privsep_send_index_pack_req(&ibuf, npackfd, packhash);
> + if (err != NULL)
> + return err;
> + err = got_privsep_wait_ack(&ibuf);
> + if (err != NULL)
> + return err;
> + err = got_privsep_send_tmpfd(&ibuf, nidxfd);
> + if (err != NULL)
> + return err;
> + err = got_privsep_wait_index_pack_done(&ibuf);
> + if (err != NULL)
> + return err;
> + imsg_clear(&ibuf);
> + if (close(imsg_idxfds[0]) == -1)
> + return got_error_from_errno("close child");
> + if (waitpid(pid, &status, 0) == -1)
> + return got_error_from_errno("child exit");
> +
> +
> + return NULL;
> +
> +}
> diff --git a/lib/got_lib_object_idset.h b/lib/got_lib_object_idset.h
> index 6ae68d9..6dbd75c 100644
> --- a/lib/got_lib_object_idset.h
> +++ b/lib/got_lib_object_idset.h
> @@ -26,6 +26,8 @@ const struct got_error *got_object_idset_remove(void **,
> struct got_object_idset *, struct got_object_id *);
> int got_object_idset_contains(struct got_object_idset *,
> struct got_object_id *);
> +void *got_object_idset_lookup_data(struct got_object_idset *,
> + struct got_object_id *);
> const struct got_error *got_object_idset_for_each(struct got_object_idset *,
> const struct got_error *(*cb)(struct got_object_id *, void *, void *),
> void *);
> diff --git a/lib/got_lib_privsep.h b/lib/got_lib_privsep.h
> index b27325d..6789c93 100644
> --- a/lib/got_lib_privsep.h
> +++ b/lib/got_lib_privsep.h
> @@ -42,6 +42,8 @@
> #define GOT_PROG_READ_TAG got-read-tag
> #define GOT_PROG_READ_PACK got-read-pack
> #define GOT_PROG_READ_GITCONFIG got-read-gitconfig
> +#define GOT_PROG_FETCH_PACK got-fetch-pack
> +#define GOT_PROG_SEND_PACK got-send-pack
>
> #define GOT_STRINGIFY(x) #x
> #define GOT_STRINGVAL(x) GOT_STRINGIFY(x)
> @@ -61,6 +63,10 @@
> GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_PACK)
> #define GOT_PATH_PROG_READ_GITCONFIG \
> GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_GITCONFIG)
> +#define GOT_PATH_PROG_FETCH_PACK \
> + GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_FETCH_PACK)
> +#define GOT_PATH_PROG_SEND_PACK \
> + GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_SEND_PACK)
>
> struct got_privsep_child {
> int imsg_fd;
> @@ -98,6 +104,12 @@ enum got_imsg_type {
> GOT_IMSG_TAG,
> GOT_IMSG_TAG_TAGMSG,
>
> + /* Messages related to networking. */
> + GOT_IMSG_FETCH_REQUEST,
> + GOT_IMSG_FETCH_DONE,
> + GOT_IMSG_IDXPACK_REQUEST,
> + GOT_IMSG_IDXPACK_DONE,
> +
> /* Messages related to pack files. */
> GOT_IMSG_PACKIDX,
> GOT_IMSG_PACK,
> @@ -106,6 +118,7 @@ enum got_imsg_type {
> /* Message sending file descriptor to a temporary file. */
> GOT_IMSG_TMPFD,
>
> +
> /* Messages related to gitconfig files. */
> GOT_IMSG_GITCONFIG_PARSE_REQUEST,
> GOT_IMSG_GITCONFIG_REPOSITORY_FORMAT_VERSION_REQUEST,
> @@ -272,6 +285,12 @@ const struct got_error *got_privsep_send_blob_outfd(struct imsgbuf *, int);
> const struct got_error *got_privsep_send_tmpfd(struct imsgbuf *, int);
> const struct got_error *got_privsep_send_obj(struct imsgbuf *,
> struct got_object *);
> +const struct got_error *got_privsep_send_index_pack_req(struct imsgbuf *, int);
> +const struct got_error *got_privsep_send_index_pack_done(struct imsgbuf *,
> + struct got_object_id*);
> +const struct got_error *got_privsep_send_fetch_req(struct imsgbuf *, int);
> +const struct got_error *got_privsep_send_fetch_done(struct imsgbuf *);
> +const struct got_error *got_privsep_wait_fetch_done(struct imsgbuf *);
> const struct got_error *got_privsep_get_imsg_obj(struct got_object **,
> struct imsg *, struct imsgbuf *);
> const struct got_error *got_privsep_recv_obj(struct got_object **,
> diff --git a/lib/inflate.c b/lib/inflate.c
> index 3986b17..ebcdf12 100644
> --- a/lib/inflate.c
> +++ b/lib/inflate.c
> @@ -86,12 +86,16 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp)
> size_t last_total_out = zb->z.total_out;
> z_stream *z = &zb->z;
> int ret = Z_ERRNO;
> + off_t off, consumed;
>
> z->next_out = zb->outbuf;
> z->avail_out = zb->outlen;
>
> *outlenp = 0;
> + off = ftello(f);
> + consumed = 0;
> do {
> + size_t last_total_in = zb->z.total_in;
> if (z->avail_in == 0) {
> size_t n = fread(zb->inbuf, 1, zb->inlen, f);
> if (n == 0) {
> @@ -105,6 +109,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp)
> z->avail_in = n;
> }
> ret = inflate(z, Z_SYNC_FLUSH);
> + consumed += z->total_in - last_total_in;
> } while (ret == Z_OK && z->avail_out > 0);
>
> if (ret == Z_OK) {
> @@ -116,6 +121,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp)
> }
>
> *outlenp = z->total_out - last_total_out;
> + fseek(f, off + consumed, SEEK_SET);
> return NULL;
> }
>
> diff --git a/lib/object.c b/lib/object.c
> index 4aaeb8a..2044c22 100644
> --- a/lib/object.c
> +++ b/lib/object.c
> @@ -51,6 +51,7 @@
> #include "got_lib_object_cache.h"
> #include "got_lib_object_parse.h"
> #include "got_lib_pack.h"
> +#include "got_lib_fetch.h"
> #include "got_lib_repository.h"
>
> #ifndef MIN
> diff --git a/lib/object_idset.c b/lib/object_idset.c
> index 527383c..510b59e 100644
> --- a/lib/object_idset.c
> +++ b/lib/object_idset.c
> @@ -168,6 +168,14 @@ got_object_idset_contains(struct got_object_idset *set,
> return entry ? 1 : 0;
> }
>
> +void *
> +got_object_idset_lookup_data(struct got_object_idset *set,
> + struct got_object_id *id)
This function seems to do the exact same thing as got_object_idset_get()
which already exists.
> +{
> + struct got_object_idset_element *entry = find_element(set, id);
> + return entry ? entry->data : NULL;
> +}
> +
> const struct got_error *
> got_object_idset_for_each(struct got_object_idset *set,
> const struct got_error *(*cb)(struct got_object_id *, void *, void *),
> diff --git a/lib/privsep.c b/lib/privsep.c
> index f31cafb..599d5eb 100644
> --- a/lib/privsep.c
> +++ b/lib/privsep.c
> @@ -401,6 +401,44 @@ got_privsep_send_obj(struct imsgbuf *ibuf, struct got_object *obj)
> return flush_imsg(ibuf);
> }
>
> +const struct got_error *
> +got_privsep_send_fetch_req(struct imsgbuf *ibuf, int fd)
> +{
> + const struct got_error *err = NULL;
> +
> + if (imsg_compose(ibuf, GOT_IMSG_FETCH_REQUEST, 0, 0, fd,
> + NULL, 0) == -1) {
> + err = got_error_from_errno("imsg_compose FETCH_REQUEST");
> + close(fd);
> + return err;
> + }
> + return flush_imsg(ibuf);
> +}
> +
> +const struct got_error *
> +got_privsep_send_fetch_done(struct imsgbuf *ibuf)
> +{
> + if (imsg_compose(ibuf, GOT_IMSG_FETCH_DONE, 0, 0, -1, NULL, 0) == -1)
> + return got_error_from_errno("imsg_compose FETCH");
> + return flush_imsg(ibuf);
> +}
> +
> +const struct got_error *
> +got_privsep_wait_fetch_done(struct imsgbuf *ibuf)
> +{
> + const struct got_error *err = NULL;
> + struct imsg imsg;
> +
> + err = got_privsep_recv_imsg(&imsg, ibuf, 0);
> + if (err)
> + return err;
> + if (imsg.hdr.type == GOT_IMSG_FETCH_DONE)
> + return NULL;
> + else
> + return got_error(GOT_ERR_PRIVSEP_MSG);
> + imsg_free(&imsg);
> +}
> +
> const struct got_error *
> got_privsep_get_imsg_obj(struct got_object **obj, struct imsg *imsg,
> struct imsgbuf *ibuf)
> diff --git a/libexec/Makefile b/libexec/Makefile
> index a4c900b..ddc207c 100644
> --- a/libexec/Makefile
> +++ b/libexec/Makefile
> @@ -1,4 +1,4 @@
> SUBDIR = got-read-blob got-read-commit got-read-object got-read-tree \
> - got-read-tag got-read-pack got-read-gitconfig
> + got-read-tag got-fetch-pack got-read-pack got-read-gitconfig
>
> .include <bsd.subdir.mk>
> diff --git a/libexec/got-index-pack/got-index-pack.c b/libexec/got-index-pack/got-index-pack.c
> new file mode 100644
> index 0000000..3bf2502
> --- /dev/null
> +++ b/libexec/got-index-pack/got-index-pack.c
> @@ -0,0 +1,1254 @@
Please add your copyright and licence statements here.
> +#include <sys/queue.h>
> +#include <sys/stat.h>
> +#include <sys/syslimits.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> +#include <sys/uio.h>
> +
> +#include <stdint.h>
> +#include <errno.h>
> +#include <imsg.h>
> +#include <limits.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <sha1.h>
> +#include <fcntl.h>
> +#include <zlib.h>
> +#include <err.h>
> +#include <assert.h>
> +#include <dirent.h>
> +
> +#include "got_error.h"
> +#include "got_object.h"
> +
> +#include "got_lib_sha1.h"
> +#include "got_lib_delta.h"
> +#include "got_lib_inflate.h"
> +#include "got_lib_object.h"
> +#include "got_lib_object_parse.h"
> +#include "got_lib_object_idset.h"
> +#include "got_lib_privsep.h"
> +
> +typedef struct Cinfo Cinfo;
> +typedef struct Tinfo Tinfo;
> +typedef struct Object Object;
> +typedef struct Pack Pack;
> +typedef struct Buf Buf;
> +typedef struct Dirent Dirent;
> +typedef struct Idxent Idxent;
> +typedef struct Ols Ols;
> +
> +enum {
> + /* 5k objects should be enough */
> + Cachemax = 5*1024,
> + Pathmax = 512,
> + Hashsz = 20,
> + Pktmax = 65536,
> +
> + Nproto = 16,
> + Nport = 16,
> + Nhost = 256,
> + Npath = 128,
> + Nrepo = 64,
> + Nbranch = 32,
> +};
> +
> +typedef enum Type {
> + GNone = 0,
> + GCommit = 1,
> + GTree = 2,
> + GBlob = 3,
> + GTag = 4,
> + GOdelta = 6,
> + GRdelta = 7,
> +} Type;
> +
> +enum {
> + Cloaded = 1 << 0,
> + Cidx = 1 << 1,
> + Ccache = 1 << 2,
> + Cexist = 1 << 3,
> + Cparsed = 1 << 5,
> +};
> +
> +struct Dirent {
> + char *name;
> + int modref;
> + int mode;
> + struct got_object_id h;
> +};
> +
> +struct Object {
> + /* Git data */
> + struct got_object_id hash;
> + Type type;
> +
> + /* Cache */
> + int id;
> + int flag;
> + int refs;
> + Object *next;
> + Object *prev;
> +
> + /* For indexing */
> + off_t off;
> + off_t len;
> + uint32_t crc;
> +
> + /* Everything below here gets cleared */
> + char *all;
> + char *data;
> + /* size excludes header */
> + off_t size;
> +
> + union {
> + Cinfo *commit;
> + Tinfo *tree;
> + };
> +};
> +
> +struct Tinfo {
> + /* Tree */
> + Dirent *ent;
> + int nent;
> +};
> +
> +struct Cinfo {
> + /* Commit */
> + struct got_object_id *parent;
> + int nparent;
> + struct got_object_id tree;
> + char *author;
> + char *committer;
> + char *msg;
> + int nmsg;
> + off_t ctime;
> + off_t mtime;
> +};
> +
> +typedef struct Buf Buf;
> +
> +struct Buf {
> + int len;
> + int sz;
> + char *data;
> +};
> +
> +static int readpacked(FILE *, Object *, int);
> +static Object *readidxobject(FILE *, struct got_object_id, int);
> +
> +struct got_object_idset *objcache;
> +int next_object_id;
> +Object *lruhead;
> +Object *lrutail;
> +int ncache;
> +
> +#define GETBE16(b)\
> + ((((b)[0] & 0xFFul) << 8) | \
> + (((b)[1] & 0xFFul) << 0))
> +
> +#define GETBE32(b)\
> + ((((b)[0] & 0xFFul) << 24) | \
> + (((b)[1] & 0xFFul) << 16) | \
> + (((b)[2] & 0xFFul) << 8) | \
> + (((b)[3] & 0xFFul) << 0))
> +#define GETBE64(b)\
> + ((((b)[0] & 0xFFull) << 56) | \
> + (((b)[1] & 0xFFull) << 48) | \
> + (((b)[2] & 0xFFull) << 40) | \
> + (((b)[3] & 0xFFull) << 32) | \
> + (((b)[4] & 0xFFull) << 24) | \
> + (((b)[5] & 0xFFull) << 16) | \
> + (((b)[6] & 0xFFull) << 8) | \
> + (((b)[7] & 0xFFull) << 0))
> +
> +#define PUTBE16(b, n)\
> + do{ \
> + (b)[0] = (n) >> 8; \
> + (b)[1] = (n) >> 0; \
> + } while(0)
> +
> +#define PUTBE32(b, n)\
> + do{ \
> + (b)[0] = (n) >> 24; \
> + (b)[1] = (n) >> 16; \
> + (b)[2] = (n) >> 8; \
> + (b)[3] = (n) >> 0; \
> + } while(0)
> +
> +#define PUTBE64(b, n)\
> + do{ \
> + (b)[0] = (n) >> 56; \
> + (b)[1] = (n) >> 48; \
> + (b)[2] = (n) >> 40; \
> + (b)[3] = (n) >> 32; \
> + (b)[4] = (n) >> 24; \
> + (b)[5] = (n) >> 16; \
> + (b)[6] = (n) >> 8; \
> + (b)[7] = (n) >> 0; \
> + } while(0)
> +
> +static int
> +charval(int c, int *err)
> +{
> + if(c >= '0' && c <= '9')
> + return c - '0';
> + if(c >= 'a' && c <= 'f')
> + return c - 'a' + 10;
> + if(c >= 'A' && c <= 'F')
> + return c - 'A' + 10;
> + *err = 1;
> + return -1;
> +}
> +
> +static int
> +hparse(struct got_object_id *h, char *b)
> +{
> + int i, err;
> +
> + err = 0;
> + for(i = 0; i < sizeof(h->sha1); i++){
> + err = 0;
> + h->sha1[i] = 0;
> + h->sha1[i] |= ((charval(b[2*i], &err) & 0xf) << 4);
> + h->sha1[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0);
> + if(err)
> + return -1;
> + }
> + return 0;
> +}
> +
> +static void *
> +emalloc(size_t n)
> +{
> + void *v;
> +
> + v = calloc(n, 1);
> + if(v == NULL)
> + err(1, "malloc:");
> + return v;
> +}
> +
> +static void *
> +erealloc(void *p, ulong n)
> +{
> + void *v;
> +
> + v = realloc(p, n);
> + if(v == NULL)
> + err(1, "realloc:");
> + memset(v, 0, n);
> + return v;
> +}
> +
> +static int
> +hasheq(struct got_object_id *a, struct got_object_id *b)
> +{
> + return memcmp(a->sha1, b->sha1, sizeof(a->sha1)) == 0;
> +}
> +
> +static char *
> +typestr(int t)
> +{
> + char *types[] = {
> + "???",
> + "commit",
> + "tree",
> + "blob",
> + "tag",
> + "odelta",
> + "rdelta",
> + };
> + if (t < 0 || t >= sizeof(types)/sizeof(types[0]))
> + abort();
> + return types[t];
> +}
> +
> +static char *
> +hashfmt(char *out, size_t nout, struct got_object_id *h)
> +{
> + int i, n, c0, c1;
> + char *p;
> +
> + if (nout < 2*sizeof(h->sha1) + 1)
> + return NULL;
> + p = out;
> + for(i = 0; i < sizeof(h->sha1); i++){
> + n = (h->sha1[i] >> 4) & 0xf;
> + c0 = (n >= 10) ? n-10 + 'a' : n + '0';
> + n = h->sha1[i] & 0xf;
> + c1 = (n >= 10) ? n-10 + 'a' : n + '0';
> + *p++ = c0;
> + *p++ = c1;
> + }
> + *p++ = 0;
> + return out;
> +}
> +
> +static void
> +clear(Object *o)
> +{
> + if(!o)
> + return;
> +
> + assert(o->refs == 0);
> + assert((o->flag & Ccache) == 0);
> + assert(o->flag & Cloaded);
> + switch(o->type){
> + case GCommit:
> + if(!o->commit)
> + break;
> + free(o->commit->parent);
> + free(o->commit->author);
> + free(o->commit->committer);
> + free(o->commit);
> + o->commit = NULL;
> + break;
> + case GTree:
> + if(!o->tree)
> + break;
> + free(o->tree->ent);
> + free(o->tree);
> + o->tree = NULL;
> + break;
> + default:
> + break;
> + }
> +
> + free(o->all);
> + o->all = NULL;
> + o->data = NULL;
> + o->flag &= ~Cloaded;
> +}
> +
> +static void
> +unref(Object *o)
> +{
> + if(!o)
> + return;
> + o->refs--;
> + if(!o->refs)
> + clear(o);
> +}
> +
> +static Object*
> +ref(Object *o)
> +{
> + o->refs++;
> + return o;
> +}
> +
> +static void
> +cache(Object *o)
> +{
> + char buf[41];
> + Object *p;
> +
> + hashfmt(buf, sizeof(buf), &o->hash);
> + if(o == lruhead)
> + return;
> + if(o == lrutail)
> + lrutail = lrutail->prev;
> + if(!(o->flag & Cexist)){
> + got_object_idset_add(objcache, &o->hash, o);
> + o->id = next_object_id++;
> + o->flag |= Cexist;
> + }
> + if(o->prev)
> + o->prev->next = o->next;
> + if(o->next)
> + o->next->prev = o->prev;
> + if(lrutail == o){
> + lrutail = o->prev;
> + lrutail->next = NULL;
> + }else if(!lrutail)
> + lrutail = o;
> + if(lruhead)
> + lruhead->prev = o;
> + o->next = lruhead;
> + o->prev = NULL;
> + lruhead = o;
> +
> + if(!(o->flag & Ccache)){
> + o->flag |= Ccache;
> + ref(o);
> + ncache++;
> + }
> + while(ncache > Cachemax){
> + p = lrutail;
> + lrutail = p->prev;
> + lrutail->next = NULL;
> + p->flag &= ~Ccache;
> + p->prev = NULL;
> + p->next = NULL;
> + unref(p);
> + ncache--;
> + }
> +}
> +
> +static int
> +preadbe32(FILE *b, int *v, off_t off)
> +{
> + char buf[4];
> +
> + if(fseek(b, off, 0) == -1)
> + return -1;
> + if(fread(buf, 1, sizeof(buf), b) == -1)
> + return -1;
> + *v = GETBE32(buf);
> +
> + return 0;
> +}
> +static int
> +preadbe64(FILE *b, off_t *v, off_t off)
> +{
> + char buf[8];
> +
> + if(fseek(b, off, 0) == -1)
> + return -1;
> + if(fread(buf, 1, sizeof(buf), b) == -1)
> + return -1;
> + *v = GETBE64(buf);
> + return 0;
> +}
> +
> +static int
> +readvint(char *p, char **pp)
> +{
> + int i, n, c;
> +
> + i = 0;
> + n = 0;
> + do {
> + c = *p++;
> + n |= (c & 0x7f) << i;
> + i += 7;
> + } while (c & 0x80);
> + *pp = p;
> +
> + return n;
> +}
> +
> +static int
> +applydelta(Object *dst, Object *base, char *d, int nd)
> +{
> + char *r, *b, *ed, *er;
> + int n, nr, c;
> + off_t o, l;
> +
> + ed = d + nd;
> + b = base->data;
> + n = readvint(d, &d);
> + if(n != base->size){
> + fprintf(stderr, "mismatched source size");
> + return -1;
> + }
> +
> + nr = readvint(d, &d);
> + r = emalloc(nr + 64);
> + n = snprintf(r, 64, "%s %d", typestr(base->type), nr) + 1;
> + dst->all = r;
> + dst->type = base->type;
> + dst->data = r + n;
> + dst->size = nr;
> + er = dst->data + nr;
> + r = dst->data;
> +
> + while(1){
> + if(d == ed)
> + break;
> + c = *d++;
> + if(!c){
> + fprintf(stderr, "bad delta encoding");
> + return -1;
> + }
> + /* copy from base */
> + if(c & 0x80){
> + o = 0;
> + l = 0;
> + /* Offset in base */
> + if(c & 0x01 && d != ed) o |= (*d++ << 0) & 0x000000ff;
> + if(c & 0x02 && d != ed) o |= (*d++ << 8) & 0x0000ff00;
> + if(c & 0x04 && d != ed) o |= (*d++ << 16) & 0x00ff0000;
> + if(c & 0x08 && d != ed) o |= (*d++ << 24) & 0xff000000;
> +
> + /* Length to copy */
> + if(c & 0x10 && d != ed) l |= (*d++ << 0) & 0x0000ff;
> + if(c & 0x20 && d != ed) l |= (*d++ << 8) & 0x00ff00;
> + if(c & 0x40 && d != ed) l |= (*d++ << 16) & 0xff0000;
> + if(l == 0) l = 0x10000;
> +
> + assert(o + l <= base->size);
> + memmove(r, b + o, l);
> + r += l;
> + /* inline data */
> + }else{
> + memmove(r, d, c);
> + d += c;
> + r += c;
> + }
> +
> + }
> + if(r != er){
> + fprintf(stderr, "truncated delta (%zd)", er - r);
> + return -1;
> + }
> +
> + return nr;
> +}
> +
> +static int
> +readrdelta(FILE *f, Object *o, int nd, int flag)
> +{
> + const struct got_error *e;
> + struct got_object_id h;
> + Object *b;
> + uint8_t *d;
> + size_t n;
> +
> + d = NULL;
> + if(fread(h.sha1, 1, sizeof(h.sha1), f) != sizeof(h.sha1))
> + goto error;
> + if(hasheq(&o->hash, &h))
> + goto error;
> + if ((e = got_inflate_to_mem(&d, &n, f)) != NULL)
> + goto error;
> + o->len = ftello(f) - o->off;
> + if(d == NULL || n != nd)
> + goto error;
> + if((b = readidxobject(f, h, flag)) == NULL)
> + goto error;
> + if(applydelta(o, b, d, n) == -1)
> + goto error;
> + free(d);
> + return 0;
> +error:
> + free(d);
> + return -1;
> +}
> +
> +static int
> +readodelta(FILE *f, Object *o, off_t nd, off_t p, int flag)
> +{
> + Object b;
> + uint8_t *d;
> + off_t r;
> + size_t n;
> + int c;
> +
> + r = 0;
> + d = NULL;
> + while(1){
> + if((c = fgetc(f)) == -1)
> + goto error;
> + r |= c & 0x7f;
> + if (!(c & 0x80))
> + break;
> + r++;
> + r <<= 7;
> + }while(c & 0x80);
> +
> + if(r > p){
> + fprintf(stderr, "junk offset -%lld (from %lld)", r, p);
> + goto error;
> + }
> +
> + if (got_inflate_to_mem(&d, &n, f) == NULL)
> + goto error;
> + o->len = ftello(f) - o->off;
> + if(d == NULL || n != nd)
> + goto error;
> + if(fseek(f, p - r, 0) == -1)
> + goto error;
> + if(readpacked(f, &b, flag) == -1)
> + goto error;
> + if(applydelta(o, &b, d, nd) == -1)
> + goto error;
> + free(d);
> + return 0;
> +error:
> + free(d);
> + return -1;
> +}
> +
> +static int
> +readpacked(FILE *f, Object *o, int flag)
> +{
> + const struct got_error *e;
> + int c, s, n;
> + off_t l, p;
> + size_t ndata;
> + uint8_t *data;
> + Type t;
> + Buf b;
> +
> + p = ftello(f);
> + c = fgetc(f);
> + if(c == -1)
> + return -1;
> + l = c & 0xf;
> + s = 4;
> + t = (c >> 4) & 0x7;
> + if(!t){
> + fprintf(stderr, "unknown type for byte %x", c);
> + return -1;
> + }
> + while(c & 0x80){
> + if((c = fgetc(f)) == -1)
> + return -1;
> + l |= (c & 0x7f) << s;
> + s += 7;
> + }
> +
> + switch(t){
> + default:
> + fprintf(stderr, "invalid object at %lld", ftello(f));
> + return -1;
> + case GCommit:
> + case GTree:
> + case GTag:
> + case GBlob:
> + b.sz = 64 + l;
> +
> + b.data = emalloc(b.sz);
> + n = snprintf(b.data, 64, "%s %lld", typestr(t), l) + 1;
> + b.len = n;
> + e = got_inflate_to_mem(&data, &ndata, f);
> + if (e != NULL || n + ndata >= b.sz) {
> + free(b.data);
> + return -1;
> + }
> + memcpy(b.data + n, data, ndata);
> + o->len = ftello(f) - o->off;
> + o->type = t;
> + o->all = b.data;
> + o->data = b.data + n;
> + o->size = ndata;
> + free(data);
> + break;
> + case GOdelta:
> + if(readodelta(f, o, l, p, flag) == -1)
> + return -1;
> + break;
> + case GRdelta:
> + if(readrdelta(f, o, l, flag) == -1)
> + return -1;
> + break;
> + }
> + o->flag |= Cloaded|flag;
> + return 0;
> +}
> +
> +static int
> +readloose(FILE *f, Object *o, int flag)
> +{
> + struct { char *tag; int type; } *p, types[] = {
> + {"blob", GBlob},
> + {"tree", GTree},
> + {"commit", GCommit},
> + {"tag", GTag},
> + {NULL},
> + };
> + char *s, *e;
> + uint8_t *d;
> + off_t sz;
> + size_t n;
> + int l;
> +
> + if (got_inflate_to_mem(&d, &n, f) != NULL)
> + return -1;
> +
> + s = (char *)d;
> + o->type = GNone;
> + for(p = types; p->tag; p++){
> + l = strlen(p->tag);
> + if(strncmp(s, p->tag, l) == 0){
> + s += l;
> + o->type = p->type;
> + while(!isspace(*s))
> + s++;
> + break;
> + }
> + }
> + if(o->type == GNone){
> + free(o->data);
> + return -1;
> + }
> + sz = strtol(s, &e, 0);
> + if(e == s || *e++ != 0){
> + fprintf(stderr, "malformed object header");
> + goto error;
> + }
> + if(sz != n - (e - (char *)d)){
> + fprintf(stderr, "mismatched sizes");
> + goto error;
> + }
> + o->size = sz;
> + o->data = e;
> + o->all = d;
> + o->flag |= Cloaded|flag;
> + return 0;
> +
> +error:
> + free(d);
> + return -1;
> +}
> +
> +static off_t
> +searchindex(FILE *f, struct got_object_id h)
> +{
> + int lo, hi, idx, i, nent;
> + off_t o, oo;
> + struct got_object_id hh;
> +
> + o = 8;
> + /*
> + * Read the fanout table. The fanout table
> + * contains 256 entries, corresponsding to
> + * the first byte of the hash. Each entry
> + * is a 4 byte big endian integer, containing
> + * the total number of entries with a leading
> + * byte <= the table index, allowing us to
> + * rapidly do a binary search on them.
> + */
> + if (h.sha1[0] == 0){
> + lo = 0;
> + if(preadbe32(f, &hi, o) == -1)
> + goto err;
> + } else {
> + o += h.sha1[0]*4 - 4;
> + if(preadbe32(f, &lo, o + 0) == -1)
> + goto err;
> + if(preadbe32(f, &hi, o + 4) == -1)
> + goto err;
> + }
> + if(hi == lo)
> + goto notfound;
> + if(preadbe32(f, &nent, 8 + 255*4) == -1)
> + goto err;
> +
> + /*
> + * Now that we know the range of hashes that the
> + * entry may exist in, read them in so we can do
> + * a bsearch.
> + */
> + idx = -1;
> + fseek(f, Hashsz*lo + 8 + 256*4, 0);
> + for(i = 0; i < hi - lo; i++){
> + if(fread(hh.sha1, 1, sizeof(hh.sha1), f) == -1)
> + goto err;
> + if(hasheq(&hh, &h))
> + idx = lo + i;
> + }
> + if(idx == -1)
> + goto notfound;
> +
> +
> + /*
> + * We found the entry. If it's 32 bits, then we
> + * can just return the oset, otherwise the 32
> + * bit entry contains the oset to the 64 bit
> + * entry.
> + */
> + oo = 8; /* Header */
> + oo += 256*4; /* Fanout table */
> + oo += Hashsz*nent; /* Hashes */
> + oo += 4*nent; /* Checksums */
> + oo += 4*idx; /* Offset offset */
> + if(preadbe32(f, &i, oo) == -1)
> + goto err;
> + o = i & 0xffffffff;
> + if(o & (1ull << 31)){
> + o &= 0x7fffffff;
> + if(preadbe64(f, &o, o) == -1)
> + goto err;
> + }
> + return o;
> +
> +err:
> + fprintf(stderr, "unable to read packfile\n");
> + return -1;
> +notfound:
> + {
> + char hstr[41];
> + hashfmt(hstr, sizeof(hstr), &h);
> + fprintf(stdout, "could not find object %s\n", hstr);
> + }
> + return -1;
> +}
> +
> +/*
> + * Scans for non-empty word, copying it into buf.
> + * Strips off word, leading, and trailing space
> + * from input.
> + *
> + * Returns -1 on empty string or error, leaving
> + * input unmodified.
> + */
> +static int
> +scanword(char **str, int *nstr, char *buf, int nbuf)
> +{
> + char *p;
> + int n, r;
> +
> + r = -1;
> + p = *str;
> + n = *nstr;
> + while(n && isblank(*p)){
> + n--;
> + p++;
> + }
> +
> + for(; n && *p && !isspace(*p); p++, n--){
> + r = 0;
> + *buf++ = *p;
> + nbuf--;
> + if(nbuf == 0)
> + return -1;
> + }
> + while(n && isblank(*p)){
> + n--;
> + p++;
> + }
> + *buf = 0;
> + *str = p;
> + *nstr = n;
> + return r;
> +}
> +
> +static void
> +nextline(char **str, int *nstr)
> +{
> + char *s;
> +
> + if((s = strchr(*str, '\n')) != NULL){
> + *nstr -= s - *str + 1;
> + *str = s + 1;
> + }
> +}
> +
> +static int
> +parseauthor(char **str, int *nstr, char **name, off_t *time)
> +{
> + return 0;
> +}
> +
> +static void
> +parsecommit(Object *o)
> +{
> + char *p, *t, buf[128];
> + int np;
> +
> + p = o->data;
> + np = o->size;
> + o->commit = emalloc(sizeof(Cinfo));
> + while(1){
> + if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> + break;
> + if(strcmp(buf, "tree") == 0){
> + if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> + errx(1, "invalid commit: tree missing");
> + if(hparse(&o->commit->tree, buf) == -1)
> + errx(1, "invalid commit: garbled tree");
> + }else if(strcmp(buf, "parent") == 0){
> + if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> + errx(1, "invalid commit: missing parent");
> + o->commit->parent = realloc(o->commit->parent, ++o->commit->nparent * sizeof(struct got_object_id));
> + if(!o->commit->parent)
> + err(1, "unable to malloc: ");
> + if(hparse(&o->commit->parent[o->commit->nparent - 1], buf) == -1)
> + errx(1, "invalid commit: garbled parent");
> + }else if(strcmp(buf, "author") == 0){
> + parseauthor(&p, &np, &o->commit->author, &o->commit->mtime);
> + }else if(strcmp(buf, "committer") == 0){
> + parseauthor(&p, &np, &o->commit->committer, &o->commit->ctime);
> + }else if(strcmp(buf, "gpgsig") == 0){
> + /* just drop it */
> + if((t = strstr(p, "-----END PGP SIGNATURE-----")) == NULL)
> + errx(1, "malformed gpg signature");
> + np -= t - p;
> + p = t;
> + }
> + nextline(&p, &np);
> + }
> + while (np && isspace(*p)) {
> + p++;
> + np--;
> + }
> + o->commit->msg = p;
> + o->commit->nmsg = np;
> +}
> +
> +static void
> +parsetree(Object *o)
> +{
> + char *p, buf[256];
> + int np, nn, m;
> + Dirent *t;
> +
> + p = o->data;
> + np = o->size;
> + o->tree = emalloc(sizeof(Tinfo));
> + while(np > 0){
> + if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> + break;
> + o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent));
> + t = &o->tree->ent[o->tree->nent - 1];
> + memset(t, 0, sizeof(Dirent));
> + m = strtol(buf, NULL, 8);
> + /* FIXME: symlinks and other BS */
> + if(m == 0160000){
> + t->mode |= S_IFDIR;
> + t->modref = 1;
> + }
> + t->mode = m & 0777;
> + if(m & 0040000)
> + t->mode |= S_IFDIR;
> + t->name = p;
> + nn = strlen(p) + 1;
> + p += nn;
> + np -= nn;
> + if(np < sizeof(t->h.sha1))
> + errx(1, "malformed tree, remaining %d (%s)", np, p);
> + memcpy(t->h.sha1, p, sizeof(t->h.sha1));
> + p += sizeof(t->h.sha1);
> + np -= sizeof(t->h.sha1);
> + }
> +}
> +
> +void
> +parseobject(Object *o)
> +{
> + if(o->flag & Cparsed)
> + return;
> + switch(o->type){
> + case GTree: parsetree(o); break;
> + case GCommit: parsecommit(o); break;
> + //case GTag: parsetag(o); break;
> + default: break;
> + }
> + o->flag |= Cparsed;
> +}
> +
> +static Object*
> +readidxobject(FILE *idx, struct got_object_id h, int flag)
> +{
> + char path[Pathmax];
> + char hbuf[41];
> + FILE *f;
> + Object *obj;
> + int l, n;
> + off_t o;
> + struct dirent *ent;
> + DIR *d;
> +
> +
> + if ((obj = got_object_idset_lookup_data(objcache, &h))) {
> + if(obj->flag & Cloaded)
> + return obj;
> + if(obj->flag & Cidx){
> + assert(idx != NULL);
> + o = ftello(idx);
> + if(fseek(idx, obj->off, 0) == -1)
> + errx(1, "could not seek to object offset");
> + if(readpacked(idx, obj, flag) == -1)
> + errx(1, "could not reload object");
> + if(fseek(idx, o, 0) == -1)
> + errx(1, "could not restore offset");
> + cache(obj);
> + return obj;
> + }
> + }
> +
> + d = NULL;
> + /* We're not putting it in the cache yet... */
> + obj = emalloc(sizeof(Object));
> + obj->id = next_object_id + 1;
> + obj->hash = h;
> +
> + hashfmt(hbuf, sizeof(hbuf), &h);
> + snprintf(path, sizeof(path), ".git/objects/%c%c/%s", hbuf[0], hbuf[1], hbuf + 2);
> + if((f = fopen(path, "r")) != NULL){
> + if(readloose(f, obj, flag) == -1)
> + goto error;
> + fclose(f);
> + parseobject(obj);
> + hashfmt(hbuf, sizeof(hbuf), &obj->hash);
> + fprintf(stderr, "object %s cached", hbuf);
> + cache(obj);
> + return obj;
> + }
> +
> + o = -1;
> + if ((d = opendir(".git/objects/pack")) == NULL)
> + err(1, "open pack dir");
> + while ((ent = readdir(d)) != NULL) {
> + l = strlen(ent->d_name);
> + if(l > 4 && strcmp(ent->d_name + l - 4, ".idx") != 0)
> + continue;
> + snprintf(path, sizeof(path), ".git/objects/pack/%s", ent->d_name);
> + if((f = fopen(path, "r")) == NULL)
> + continue;
> + o = searchindex(f, h);
> + fclose(f);
> + if(o == -1)
> + continue;
> + break;
> + }
> + closedir(d);
> +
> + if (o == -1)
> + goto error;
> +
> + if((n = snprintf(path, sizeof(path), "%s", path)) >= sizeof(path) - 4)
> + goto error;
> + memcpy(path + n - 4, ".pack", 6);
> + if((f = fopen(path, "r")) == NULL)
> + goto error;
> + if(fseek(f, o, 0) == -1)
> + goto error;
> + if(readpacked(f, obj, flag) == -1)
> + goto error;
> + fclose(f);
> + parseobject(obj);
> + cache(obj);
> + return obj;
> +error:
> + free(obj);
> + return NULL;
> +}
> +
> +Object*
> +readobject(struct got_object_id h)
> +{
> + Object *o;
> +
> + o = readidxobject(NULL, h, 0);
> + if(o)
> + ref(o);
> + return o;
> +}
> +
> +int
> +objcmp(const void *pa, const void *pb)
> +{
> + Object *a, *b;
> +
> + a = *(Object**)pa;
> + b = *(Object**)pb;
> + return memcmp(a->hash.sha1, b->hash.sha1, sizeof(a->hash.sha1));
> +}
> +
> +static int
> +hwrite(FILE *b, void *buf, int len, SHA1_CTX *ctx)
> +{
> + SHA1Update(ctx, buf, len);
> + return fwrite(buf, 1, len, b);
> +}
> +
> +static uint32_t
> +objectcrc(FILE *f, Object *o)
> +{
> + char buf[8096];
> + int n, r;
> +
> + o->crc = 0;
> + fseek(f, o->off, 0);
> + for(n = o->len; n > 0; n -= r){
> + r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f);
> + if(r == -1)
> + return -1;
> + if(r == 0)
> + return 0;
> + o->crc = crc32(o->crc, buf, r);
> + }
> + return 0;
> +}
> +
> +int
> +indexpack(int packfd, int idxfd, struct got_object_id packhash)
> +{
> + char hdr[4*3], buf[8];
> + int nobj, nvalid, nbig, n, i, step;
> + Object *o, **objects;
> + char *valid;
> + SHA1_CTX ctx, objctx;
> + FILE *f;
> + struct got_object_id h;
> + int c;
> +
> + if ((f = fdopen(packfd, "r")) == NULL)
> + return -1;
> + if (fseek(f, 0, SEEK_SET) == -1)
> + return -1;
> + if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) {
> + fprintf(stderr, "short read on header");
> + return -1;
> + }
> + if (memcmp(hdr, "PACK\0\0\0\2", 8) != 0) {
> + fprintf(stderr, "invalid header");
> + return -1;
> + }
> +
> + nvalid = 0;
> + nobj = GETBE32(hdr + 8);
> + objects = calloc(nobj, sizeof(Object*));
> + valid = calloc(nobj, sizeof(char));
> + step = nobj/100;
> + if(!step)
> + step++;
> + while (nvalid != nobj) {
> + fprintf(stderr, "indexing (%d/%d):", nvalid, nobj);
> + n = 0;
> + for (i = 0; i < nobj; i++) {
> + if (valid[i]) {
> + n++;
> + continue;
> + }
> + if (i % step == 0)
> + fprintf(stderr, ".");
> + if (!objects[i]) {
> + o = emalloc(sizeof(Object));
> + o->off = ftello(f);
> + objects[i] = o;
> + }
> + o = objects[i];
> + fseek(f, o->off, 0);
> + if (readpacked(f, o, Cidx) == 0){
> + SHA1Init(&objctx);
> + SHA1Update(&objctx, (uint8_t*)o->all, o->size + strlen(o->all) + 1);
> + SHA1Final(o->hash.sha1, &objctx);
> + cache(o);
> + valid[i] = 1;
> + n++;
> + }
> + if(objectcrc(f, o) == -1)
> + return -1;
> + }
> + fprintf(stderr, "\n");
> + if (n == nvalid) {
> + errx(1, "fix point reached too early: %d/%d", nvalid, nobj);
> + goto error;
> + }
> + nvalid = n;
> + }
> + fclose(f);
> +
> + SHA1Init(&ctx);
> + qsort(objects, nobj, sizeof(Object*), objcmp);
> + if((f = fdopen(idxfd, "w")) == NULL)
> + return -1;
> + if(hwrite(f, "\xfftOc\x00\x00\x00\x02", 8, &ctx) != 8)
> + goto error;
> + /* fanout table */
> + c = 0;
> + for(i = 0; i < 256; i++){
> + while(c < nobj && (objects[c]->hash.sha1[0] & 0xff) <= i)
> + c++;
> + PUTBE32(buf, c);
> + hwrite(f, buf, 4, &ctx);
> + }
> + for(i = 0; i < nobj; i++){
> + o = objects[i];
> + hwrite(f, o->hash.sha1, sizeof(o->hash.sha1), &ctx);
> + }
> +
> + /* pointless, nothing uses this */
> + for(i = 0; i < nobj; i++){
> + PUTBE32(buf, objects[i]->crc);
> + hwrite(f, buf, 4, &ctx);
> + }
> +
> + nbig = 0;
> + for(i = 0; i < nobj; i++){
> + if(objects[i]->off <= (1ull<<31))
> + PUTBE32(buf, objects[i]->off);
> + else
> + PUTBE32(buf, (1ull << 31) | nbig++);
> + hwrite(f, buf, 4, &ctx);
> + }
> + for(i = 0; i < nobj; i++){
> + if(objects[i]->off > (1ull<<31)){
> + PUTBE64(buf, objects[i]->off);
> + hwrite(f, buf, 8, &ctx);
> + }
> + }
> + hwrite(f, packhash.sha1, sizeof(packhash.sha1), &ctx);
> + SHA1Final(h.sha1, &ctx);
> + fwrite(h.sha1, 1, sizeof(h.sha1), f);
> +
> + free(objects);
> + free(valid);
> + fclose(f);
> + return 0;
> +
> +error:
> + free(objects);
> + free(valid);
> + fclose(f);
> + return -1;
> +}
> +
> +int
> +main(int argc, char **argv)
> +{
> + const struct got_error *err = NULL;
> + struct got_object_id packhash;
> + struct imsgbuf ibuf;
> + struct imsg imsg;
> + int packfd, idxfd;
> +
> + objcache = got_object_idset_alloc();
> + imsg_init(&ibuf, GOT_IMSG_FD_CHILD);
> + if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
> + if (err->code == GOT_ERR_PRIVSEP_PIPE)
> + err = NULL;
> + goto done;
> + }
> + if (imsg.hdr.type == GOT_IMSG_STOP)
> + goto done;
> + if (imsg.hdr.type != GOT_IMSG_IDXPACK_REQUEST) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + goto done;
> + }
> + if (imsg.hdr.len - IMSG_HEADER_SIZE != SHA1_DIGEST_LENGTH) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + goto done;
> + }
> + packfd = imsg.fd;
> + memcpy(packhash.sha1, imsg.data, SHA1_DIGEST_LENGTH);
> + got_privsep_send_ack(&ibuf);
> +
> + if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
> + if (err->code == GOT_ERR_PRIVSEP_PIPE)
> + err = NULL;
> + goto done;
> + }
> + if (imsg.hdr.type == GOT_IMSG_STOP)
> + goto done;
> + if (imsg.hdr.type != GOT_IMSG_TMPFD) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + goto done;
> + }
> + if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + goto done;
> + }
> + idxfd = imsg.fd;
> +
> + indexpack(packfd, idxfd, packhash);
> +done:
> + if(err != NULL)
> + got_privsep_send_error(&ibuf, err);
> + else
> + err = got_privsep_send_index_pack_done(&ibuf);
> + if(err != NULL) {
> + fprintf(stderr, "%s: %s\n", getprogname(), err->msg);
> + got_privsep_send_error(&ibuf, err);
> + }
> +
> + exit(0);
> +}
>
> --
> Ori Bernstein
>
>
got clone: start of effort.