"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: got clone: start of effort.
To:
Ori Bernstein <ori@eigenstate.org>
Cc:
gameoftrees@openbsd.org
Date:
Fri, 29 Nov 2019 13:45:18 -0700

Download raw body.

Thread
On Fri, Nov 29, 2019 at 12:17:36PM -0800, Ori Bernstein wrote:
> This diff implements the fetch protocol support for git+ssh, and almost
> implements it for unencrypted git:// (haven't written the code to
> open the socket yet)
> 
> It's a quick and dirty port/airdrop/privsepification of the code I wrote
> for git9, so it doesn't match the style or the data structures used in
> the rest of got. We'll need to do a few passes to clean it up and make it
> fit before i'd be happy getting it in, but I'd like to show where things
> are.
> 
> There's no unveil or pledge.
> 
> The code in git-fetch-pack should be usable for incremental fetches, too,
> once we do the plumbing to figure out which objects are in the tree.
> 
> I'll be doing another diff to implement git push, doing the same kind of
> airdrop.

Thanks so much for working on this!

Comments inline:

> diff --git a/got/Makefile b/got/Makefile
> index 709948a..27b8780 100644
> --- a/got/Makefile
> +++ b/got/Makefile
> @@ -8,7 +8,7 @@ SRCS=		got.c blame.c commit_graph.c delta.c diff.c \
>  		object_idset.c object_parse.c opentemp.c path.c pack.c \
>  		privsep.c reference.c repository.c sha1.c worktree.c \
>  		inflate.c buf.c rcsutil.c diff3.c lockfile.c \
> -		deflate.c object_create.c delta_cache.c
> +		deflate.c object_create.c delta_cache.c fetch.c
>  MAN =		${PROG}.1 got-worktree.5 git-repository.5
>  
>  CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib
> diff --git a/got/got.c b/got/got.c
> index 9f40340..22bd4d7 100644
> --- a/got/got.c
> +++ b/got/got.c
> @@ -45,6 +45,7 @@
>  #include "got_worktree.h"
>  #include "got_diff.h"
>  #include "got_commit_graph.h"
> +#include "got_lib_fetch.h"
>  #include "got_blame.h"
>  #include "got_privsep.h"
>  #include "got_opentemp.h"
> @@ -80,6 +81,7 @@ __dead static void	usage(int);
>  __dead static void	usage_init(void);
>  __dead static void	usage_import(void);
>  __dead static void	usage_checkout(void);
> +__dead static void	usage_clone(void);
>  __dead static void	usage_update(void);
>  __dead static void	usage_log(void);
>  __dead static void	usage_diff(void);
> @@ -104,6 +106,7 @@ __dead static void	usage_cat(void);
>  
>  static const struct got_error*		cmd_init(int, char *[]);
>  static const struct got_error*		cmd_import(int, char *[]);
> +static const struct got_error*		cmd_clone(int, char *[]);
>  static const struct got_error*		cmd_checkout(int, char *[]);
>  static const struct got_error*		cmd_update(int, char *[]);
>  static const struct got_error*		cmd_log(int, char *[]);
> @@ -131,6 +134,7 @@ static struct got_cmd got_commands[] = {
>  	{ "init",	cmd_init,	usage_init,	"in" },
>  	{ "import",	cmd_import,	usage_import,	"im" },
>  	{ "checkout",	cmd_checkout,	usage_checkout,	"co" },
> +	{ "clone",	cmd_clone,	usage_clone,	"cl" },
>  	{ "update",	cmd_update,	usage_update,	"up" },
>  	{ "log",	cmd_log,	usage_log,	"" },
>  	{ "diff",	cmd_diff,	usage_diff,	"di" },
> @@ -794,6 +798,13 @@ done:
>  	return error;
>  }
>  
> +__dead static void
> +usage_clone(void)
> +{
> +	fprintf(stderr, "usage: %s clone repo-path\n", getprogname());

clone repo-url?

> +	exit(1);
> +}
> +
>  __dead static void
>  usage_checkout(void)
>  {
> @@ -969,6 +980,34 @@ resolve_commit_arg(struct got_object_id **commit_id,
>  	return err;
>  }
>  
> +static const struct got_error *
> +cmd_clone(int argc, char *argv[])
> +{
> +	char *uri, *branch_filter, *dirname;
> +	int ch;
> +
> +	while ((ch = getopt(argc, argv, "b:c:p:")) != -1) {

-c and -p aren't used.

> +		switch (ch) {
> +		case 'b':
> +			branch_filter = optarg;
> +			break;
> +		default:
> +			usage_clone();
> +			break;
> +		}
> +	}
> +	argc -= optind;
> +	argv += optind;
> +	uri = argv[0];
> +	if(argc == 1)
> +		dirname = NULL;
> +	else if(argc == 2)
> +		dirname = argv[1];
> +	else
> +		usage_clone();
> +	return got_clone(argv[0], branch_filter, dirname);
> +}
> +
>  static const struct got_error *
>  cmd_checkout(int argc, char *argv[])
>  {
> diff --git a/include/got_error.h b/include/got_error.h
> index d0df125..50c6e2d 100644
> --- a/include/got_error.h
> +++ b/include/got_error.h
> @@ -128,6 +128,9 @@
>  #define GOT_ERR_REGEX		112
>  #define GOT_ERR_REF_NAME_MINUS	113
>  #define GOT_ERR_GITCONFIG_SYNTAX 114
> +#define GOT_ERR_FETCH_FAILED	115
> +#define GOT_ERR_PARSE_URI	116
> +#define GOT_ERR_BAD_PROTO	117
>  
>  static const struct got_error {
>  	int code;
> @@ -262,6 +265,9 @@ static const struct got_error {
>  	{ GOT_ERR_REGEX, "regular expression error" },
>  	{ GOT_ERR_REF_NAME_MINUS, "reference name may not start with '-'" },
>  	{ GOT_ERR_GITCONFIG_SYNTAX, "gitconfig syntax error" },
> +	{ GOT_ERR_FETCH_FAILED, "fetch failed" },
> +	{ GOT_ERR_PARSE_URI, "failed to parse uri" },
> +	{ GOT_ERR_BAD_PROTO, "unknown protocol" },
>  };
>  
>  /*
> diff --git a/lib/fetch.c b/lib/fetch.c
> new file mode 100644
> index 0000000..dfba5c3
> --- /dev/null
> +++ b/lib/fetch.c
> @@ -0,0 +1,342 @@
> +/*
> + * Copyright (c) 2018, 2019 Stefan Sperling <stsp@openbsd.org>

PLease add your own copyright claim here. I didn't write most of this.

> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/queue.h>
> +#include <sys/uio.h>
> +#include <sys/socket.h>
> +#include <sys/wait.h>
> +#include <sys/syslimits.h>
> +#include <sys/resource.h>
> +
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdint.h>
> +#include <sha1.h>
> +#include <zlib.h>
> +#include <ctype.h>
> +#include <limits.h>
> +#include <imsg.h>
> +#include <time.h>
> +#include <uuid.h>
> +
> +#include "got_error.h"
> +#include "got_reference.h"
> +#include "got_repository.h"
> +#include "got_path.h"
> +#include "got_cancel.h"
> +#include "got_worktree.h"
> +#include "got_object.h"
> +
> +#include "got_lib_delta.h"
> +#include "got_lib_inflate.h"
> +#include "got_lib_object.h"
> +#include "got_lib_object_parse.h"
> +#include "got_lib_object_create.h"
> +#include "got_lib_pack.h"
> +#include "got_lib_sha1.h"
> +#include "got_lib_privsep.h"
> +#include "got_lib_object_cache.h"
> +#include "got_lib_repository.h"
> +
> +#define GOT_PROTOMAX	64
> +#define GOT_HOSTMAX	256
> +#define GOT_PATHMAX	512
> +#define GOT_REPOMAX	256
> +#define GOT_PORTMAX	16
> +#define GOT_URIMAX	1024
> +
> +static int
> +mkpath(char *path)
> +{
> +	char *p, namebuf[PATH_MAX];
> +	struct stat sb;
> +	int done;
> +
> +	while (*path == '/')
> +		path++;
> +	if(strlcpy(namebuf, path, sizeof(namebuf)) >= sizeof(namebuf)) {
> +		errno = ENAMETOOLONG;
> +		return -1;
> +	}
> +
> +	p = namebuf;
> +	for (;;) {
> +		p += strspn(p, "/");
> +		p += strcspn(p, "/");
> +		done = (*p == '\0');
> +		*p = '\0';
> +
> +		if (mkdir(namebuf, 0755) != 0) {
> +			int mkdir_errno = errno;
> +			if (stat(path, &sb) == -1) {
> +				/* Not there; use mkdir()s errno */
> +				errno = mkdir_errno;
> +				return -1;
> +			}
> +			if (!S_ISDIR(sb.st_mode)) {
> +				/* Is there, but isn't a directory */
> +				errno = ENOTDIR;
> +				return -1;
> +			}
> +		}
> +
> +		if (done)
> +			break;
> +		*p = '/';
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +hassuffix(char *base, char *suf)
> +{
> +	int nb, ns;
> +
> +	nb = strlen(base);
> +	ns = strlen(suf);
> +	if(ns <= nb && strcmp(base + (nb - ns), suf) == 0)
> +		return 1;
> +	return 0;
> +}
> +
> +static int
> +grab(char *dst, int n, char *p, char *e)
> +{
> +	int l;
> +
> +	l = e - p;
> +	if(l >= n) {
> +		errno = ENAMETOOLONG;
> +		return -1;
> +	}
> +	return strlcpy(dst, p, l + 1);
> +}
> +
> +static int
> +got_dial_ssh(char *host, char *port, char *path, char *direction)
> +{
> +	int pid, pfd[2];
> +	char cmd[64];
> +
> +	if(pipe(pfd) == -1)
> +		return -1;
> +	pid = fork();
> +	if(pid == -1)
> +		return -1;
> +	if(pid == 0){
> +		close(pfd[1]);
> +		dup2(pfd[0], 0);
> +		dup2(pfd[0], 1);
> +		snprintf(cmd, sizeof(cmd), "git-%s-pack", direction);
> +		execlp("ssh", "ssh", host, cmd, path, NULL);
> +		abort();
> +	}else{
> +		close(pfd[0]);
> +		return pfd[1];
> +	}
> +}
> +
> +#if 0
> +int
> +got_dial_git(char *host, char *port, char *path, char *direction)
> +{
> +	struct sockaddr_storage sk;
> +	char *ds, cmd[128];
> +	int fd, l;
> +
> +	ds = netmkaddr(host, "tcp", port);
> +	fd = dial(ds, NULL, NULL, NULL);
> +	if(fd == -1)
> +		return -1;
> +	if(chattygit)
> +		fprint(2, "dial %s %s git-%s-pack %s\n", host, port, direction, path);
> +	l = snprint(cmd, sizeof(cmd), "git-%s-pack %s\n", direction, path);
> +	if(writepkt(fd, cmd, l + 1) == -1){
> +		print("failed to write message\n");
> +		close(fd);
> +		return -1;
> +	}
> +	return fd;
> +}
> +#endif
> +
> +int
> +got_parse_uri(char *uri, char *proto, char *host, char *port, char *path, char *repo)
> +{
> +	char *s, *p, *q;
> +	int n, hasport;
> +
> +	p = strstr(uri, "://");
> +	if(!p){
> +		//werrstr("missing protocol");
> +		return -1;
> +	}
> +	if (grab(proto, GOT_PROTOMAX, uri, p) == -1)
> +		return -1;
> +	hasport = (strcmp(proto, "git") == 0 || strstr(proto, "http") == proto);
> +	s = p + 3;
> +	p = NULL;
> +	if(!hasport){
> +		p = strstr(s, ":");
> +		if(p != NULL)
> +			p++;
> +	}
> +	if(p == NULL)
> +		p = strstr(s, "/");
> +	if(p == NULL || strlen(p) == 1){
> +		//werrstr("missing path");
> +		return -1;
> +	}
> +
> +	q = memchr(s, ':', p - s);
> +	if(q){
> +		grab(host, GOT_HOSTMAX, s, q);
> +		grab(port, GOT_PORTMAX, q + 1, p);
> +	}else{
> +		grab(host, GOT_HOSTMAX, s, p);
> +		snprintf(port, GOT_PORTMAX, "9418");
> +	}
> +	
> +	snprintf(path, GOT_PATHMAX, "%s", p);
> +	p = strrchr(p, '/') + 1;
> +	if(!p || strlen(p) == 0){
> +		//werrstr("missing repository in uri");
> +		return -1;
> +	}
> +	n = strlen(p);
> +	if(hassuffix(p, ".git"))
> +		n -= 4;
> +	grab(repo, GOT_REPOMAX, p, p + n);
> +	return 0;
> +}
> +
> +const struct got_error*
> +got_clone(char *uri, char *branch_filter, char *dirname)
> +{
> +	char proto[GOT_PROTOMAX], host[GOT_HOSTMAX], port[GOT_PORTMAX];
> +	char repo[GOT_REPOMAX], path[GOT_PATHMAX];
> +	int imsg_fetchfds[2], imsg_idxfds[2], fetchfd;
> +	int packfd, npackfd, idxfd, nidxfd, status;
> +	struct got_object_id packhash;
> +	const struct got_error *err;
> +	struct imsgbuf ibuf;
> +	pid_t pid;
> +
> +	fetchfd = -1;
> +	if (got_parse_uri(uri, proto, host, port, path, repo) == -1)
> +		return got_error(GOT_ERR_PARSE_URI);
> +	if (dirname == NULL)
> +		dirname = repo;
> +	err = got_repo_init(dirname);
> +	if (err != NULL)
> +		return err;
> +	if (chdir(dirname))
> +		return got_error_from_errno("enter new repo");
> +	if (mkpath(".git/objects/pack") == -1)
> +		return got_error_from_errno("mkpath");
> +	packfd = open(".git/objects/pack/fetching.pack", O_CREAT|O_RDWR, 0644);
> +	if (packfd == -1)
> +		return got_error_from_errno("open pack");
> +	npackfd = dup(packfd);
> +	if (npackfd == -1)
> +		return got_error_from_errno("dup");
> +	idxfd = open(".git/objects/pack/fetching.idx", O_CREAT|O_RDWR, 0644);
> +	if (idxfd == -1)
> +		return got_error_from_errno("open pack");
> +	nidxfd = dup(idxfd);
> +	if (nidxfd == -1)
> +		return got_error_from_errno("dup");
> +
> +	if(strcmp(proto, "ssh") == 0 || strcmp(proto, "git+ssh") == 0)
> +		fetchfd = got_dial_ssh(host, port, path, "upload");
> +	//else if(strcmp(proto, "git") == 0)
> +	//	fetchfd = got_dial_git(host, port, path, "upload");
> +	else if(strcmp(proto, "http") == 0 || strcmp(proto, "git+http") == 0)
> +		err = got_error(GOT_ERR_BAD_PROTO);
> +	else
> +		err = got_error(GOT_ERR_BAD_PROTO);
> +
> +	if (fetchfd == -1)
> +		err = got_error_from_errno("dial uri");
> +	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fetchfds) == -1)
> +		return got_error_from_errno("socketpair");
> +
> +	pid = fork();
> +	if (pid == -1)
> +		return got_error_from_errno("fork");
> +	else if (pid == 0)
> +		got_privsep_exec_child(imsg_fetchfds, GOT_PATH_PROG_FETCH_PACK, ".");
> +
> +	if (close(imsg_fetchfds[1]) != 0)
> +		return got_error_from_errno("close");
> +	imsg_init(&ibuf, imsg_fetchfds[0]);
> +	err = got_privsep_send_fetch_req(&ibuf, fetchfd);
> +	if (err != NULL)
> +		return err;
> +	err = got_privsep_wait_ack(&ibuf);
> +	if (err != NULL)
> +		return err;
> +	err = got_privsep_send_tmpfd(&ibuf, npackfd);
> +	if (err != NULL)
> +		return err;
> +	npackfd = dup(packfd);
> +	if (npackfd == -1)
> +		return got_error_from_errno("dup");
> +	err = got_privsep_wait_fetch_done(&ibuf, &packhash);
> +	if (err != NULL)
> +		return err;
> +	if (waitpid(pid, &status, 0) == -1)
> +		return got_error_from_errno("child exit");
> +
> +	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_idxfds) == -1)
> +		return got_error_from_errno("socketpair");
> +	pid = fork();
> +	if (pid == -1)
> +		return got_error_from_errno("fork");
> +	else if (pid == 0)
> +		got_privsep_exec_child(imsg_idxfds, GOT_PATH_PROG_INDEX_PACK, ".");
> +	if (close(imsg_idxfds[1]) != 0)
> +		return got_error_from_errno("close");
> +	imsg_init(&ibuf, imsg_idxfds[0]);
> +
> +	err = got_privsep_send_index_pack_req(&ibuf, npackfd, packhash);
> +	if (err != NULL)
> +		return err;
> +	err = got_privsep_wait_ack(&ibuf);
> +	if (err != NULL)
> +		return err;
> +	err = got_privsep_send_tmpfd(&ibuf, nidxfd);
> +	if (err != NULL)
> +		return err;
> +	err = got_privsep_wait_index_pack_done(&ibuf);
> +	if (err != NULL)
> +		return err;
> +	imsg_clear(&ibuf);
> +	if (close(imsg_idxfds[0]) == -1)
> +		return got_error_from_errno("close child");
> +	if (waitpid(pid, &status, 0) == -1)
> +		return got_error_from_errno("child exit");
> +
> +
> +	return NULL;
> +
> +}
> diff --git a/lib/got_lib_object_idset.h b/lib/got_lib_object_idset.h
> index 6ae68d9..6dbd75c 100644
> --- a/lib/got_lib_object_idset.h
> +++ b/lib/got_lib_object_idset.h
> @@ -26,6 +26,8 @@ const struct got_error *got_object_idset_remove(void **,
>      struct got_object_idset *, struct got_object_id *);
>  int got_object_idset_contains(struct got_object_idset *,
>      struct got_object_id *);
> +void *got_object_idset_lookup_data(struct got_object_idset *,
> +    struct got_object_id *);
>  const struct got_error *got_object_idset_for_each(struct got_object_idset *,
>      const struct got_error *(*cb)(struct got_object_id *, void *, void *),
>      void *);
> diff --git a/lib/got_lib_privsep.h b/lib/got_lib_privsep.h
> index b27325d..6789c93 100644
> --- a/lib/got_lib_privsep.h
> +++ b/lib/got_lib_privsep.h
> @@ -42,6 +42,8 @@
>  #define GOT_PROG_READ_TAG	got-read-tag
>  #define GOT_PROG_READ_PACK	got-read-pack
>  #define GOT_PROG_READ_GITCONFIG	got-read-gitconfig
> +#define GOT_PROG_FETCH_PACK	got-fetch-pack
> +#define GOT_PROG_SEND_PACK	got-send-pack
>  
>  #define GOT_STRINGIFY(x) #x
>  #define GOT_STRINGVAL(x) GOT_STRINGIFY(x)
> @@ -61,6 +63,10 @@
>  	GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_PACK)
>  #define GOT_PATH_PROG_READ_GITCONFIG \
>  	GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_GITCONFIG)
> +#define GOT_PATH_PROG_FETCH_PACK \
> +	GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_FETCH_PACK)
> +#define GOT_PATH_PROG_SEND_PACK \
> +	GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_SEND_PACK)
>  
>  struct got_privsep_child {
>  	int imsg_fd;
> @@ -98,6 +104,12 @@ enum got_imsg_type {
>  	GOT_IMSG_TAG,
>  	GOT_IMSG_TAG_TAGMSG,
>  
> +	/* Messages related to networking. */
> +	GOT_IMSG_FETCH_REQUEST,
> +	GOT_IMSG_FETCH_DONE,
> +	GOT_IMSG_IDXPACK_REQUEST,
> +	GOT_IMSG_IDXPACK_DONE,
> +
>  	/* Messages related to pack files. */
>  	GOT_IMSG_PACKIDX,
>  	GOT_IMSG_PACK,
> @@ -106,6 +118,7 @@ enum got_imsg_type {
>  	/* Message sending file descriptor to a temporary file. */
>  	GOT_IMSG_TMPFD,
>  
> +
>  	/* Messages related to gitconfig files. */
>  	GOT_IMSG_GITCONFIG_PARSE_REQUEST,
>  	GOT_IMSG_GITCONFIG_REPOSITORY_FORMAT_VERSION_REQUEST,
> @@ -272,6 +285,12 @@ const struct got_error *got_privsep_send_blob_outfd(struct imsgbuf *, int);
>  const struct got_error *got_privsep_send_tmpfd(struct imsgbuf *, int);
>  const struct got_error *got_privsep_send_obj(struct imsgbuf *,
>      struct got_object *);
> +const struct got_error *got_privsep_send_index_pack_req(struct imsgbuf *, int);
> +const struct got_error *got_privsep_send_index_pack_done(struct imsgbuf *,
> +    struct got_object_id*);
> +const struct got_error *got_privsep_send_fetch_req(struct imsgbuf *, int);
> +const struct got_error *got_privsep_send_fetch_done(struct imsgbuf *);
> +const struct got_error *got_privsep_wait_fetch_done(struct imsgbuf *);
>  const struct got_error *got_privsep_get_imsg_obj(struct got_object **,
>      struct imsg *, struct imsgbuf *);
>  const struct got_error *got_privsep_recv_obj(struct got_object **,
> diff --git a/lib/inflate.c b/lib/inflate.c
> index 3986b17..ebcdf12 100644
> --- a/lib/inflate.c
> +++ b/lib/inflate.c
> @@ -86,12 +86,16 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp)
>  	size_t last_total_out = zb->z.total_out;
>  	z_stream *z = &zb->z;
>  	int ret = Z_ERRNO;
> +	off_t off, consumed;
>  
>  	z->next_out = zb->outbuf;
>  	z->avail_out = zb->outlen;
>  
>  	*outlenp = 0;
> +	off = ftello(f);
> +	consumed = 0;
>  	do {
> +		size_t last_total_in = zb->z.total_in;
>  		if (z->avail_in == 0) {
>  			size_t n = fread(zb->inbuf, 1, zb->inlen, f);
>  			if (n == 0) {
> @@ -105,6 +109,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp)
>  			z->avail_in = n;
>  		}
>  		ret = inflate(z, Z_SYNC_FLUSH);
> +		consumed += z->total_in - last_total_in;
>  	} while (ret == Z_OK && z->avail_out > 0);
>  
>  	if (ret == Z_OK) {
> @@ -116,6 +121,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp)
>  	}
>  
>  	*outlenp = z->total_out - last_total_out;
> +	fseek(f, off + consumed, SEEK_SET);
>  	return NULL;
>  }
>  
> diff --git a/lib/object.c b/lib/object.c
> index 4aaeb8a..2044c22 100644
> --- a/lib/object.c
> +++ b/lib/object.c
> @@ -51,6 +51,7 @@
>  #include "got_lib_object_cache.h"
>  #include "got_lib_object_parse.h"
>  #include "got_lib_pack.h"
> +#include "got_lib_fetch.h"
>  #include "got_lib_repository.h"
>  
>  #ifndef MIN
> diff --git a/lib/object_idset.c b/lib/object_idset.c
> index 527383c..510b59e 100644
> --- a/lib/object_idset.c
> +++ b/lib/object_idset.c
> @@ -168,6 +168,14 @@ got_object_idset_contains(struct got_object_idset *set,
>  	return entry ? 1 : 0;
>  }
>  
> +void *
> +got_object_idset_lookup_data(struct got_object_idset *set,
> +    struct got_object_id *id)

This function seems to do the exact same thing as got_object_idset_get()
which already exists.

> +{
> +	struct got_object_idset_element *entry = find_element(set, id);
> +	return entry ? entry->data : NULL;
> +}
> +
>  const struct got_error *
>  got_object_idset_for_each(struct got_object_idset *set,
>      const struct got_error *(*cb)(struct got_object_id *, void *, void *),
> diff --git a/lib/privsep.c b/lib/privsep.c
> index f31cafb..599d5eb 100644
> --- a/lib/privsep.c
> +++ b/lib/privsep.c
> @@ -401,6 +401,44 @@ got_privsep_send_obj(struct imsgbuf *ibuf, struct got_object *obj)
>  	return flush_imsg(ibuf);
>  }
>  
> +const struct got_error *
> +got_privsep_send_fetch_req(struct imsgbuf *ibuf, int fd)
> +{
> +	const struct got_error *err = NULL;
> +
> +	if (imsg_compose(ibuf, GOT_IMSG_FETCH_REQUEST, 0, 0, fd,
> +	    NULL, 0) == -1) {
> +		err = got_error_from_errno("imsg_compose FETCH_REQUEST");
> +		close(fd);
> +		return err;
> +	}
> +	return flush_imsg(ibuf);
> +}
> +
> +const struct got_error *
> +got_privsep_send_fetch_done(struct imsgbuf *ibuf)
> +{
> +	if (imsg_compose(ibuf, GOT_IMSG_FETCH_DONE, 0, 0, -1, NULL, 0) == -1)
> +		return got_error_from_errno("imsg_compose FETCH");
> +	return flush_imsg(ibuf);
> +}
> +
> +const struct got_error *
> +got_privsep_wait_fetch_done(struct imsgbuf *ibuf)
> +{
> +	const struct got_error *err = NULL;
> +	struct imsg imsg;
> +
> +	err = got_privsep_recv_imsg(&imsg, ibuf, 0);
> +	if (err)
> +		return err;
> +	if (imsg.hdr.type == GOT_IMSG_FETCH_DONE)
> +		return NULL;
> +	else
> +		return got_error(GOT_ERR_PRIVSEP_MSG);
> +	imsg_free(&imsg);
> +}
> +
>  const struct got_error *
>  got_privsep_get_imsg_obj(struct got_object **obj, struct imsg *imsg,
>      struct imsgbuf *ibuf)
> diff --git a/libexec/Makefile b/libexec/Makefile
> index a4c900b..ddc207c 100644
> --- a/libexec/Makefile
> +++ b/libexec/Makefile
> @@ -1,4 +1,4 @@
>  SUBDIR = got-read-blob got-read-commit got-read-object got-read-tree \
> -	got-read-tag got-read-pack got-read-gitconfig
> +	got-read-tag got-fetch-pack got-read-pack got-read-gitconfig
>  
>  .include <bsd.subdir.mk>
> diff --git a/libexec/got-index-pack/got-index-pack.c b/libexec/got-index-pack/got-index-pack.c
> new file mode 100644
> index 0000000..3bf2502
> --- /dev/null
> +++ b/libexec/got-index-pack/got-index-pack.c
> @@ -0,0 +1,1254 @@

Please add your copyright and licence statements here.

> +#include <sys/queue.h>
> +#include <sys/stat.h>
> +#include <sys/syslimits.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> +#include <sys/uio.h>
> +
> +#include <stdint.h>
> +#include <errno.h>
> +#include <imsg.h>
> +#include <limits.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <sha1.h>
> +#include <fcntl.h>
> +#include <zlib.h>
> +#include <err.h>
> +#include <assert.h>
> +#include <dirent.h>
> +
> +#include "got_error.h"
> +#include "got_object.h"
> +
> +#include "got_lib_sha1.h"
> +#include "got_lib_delta.h"
> +#include "got_lib_inflate.h"
> +#include "got_lib_object.h"
> +#include "got_lib_object_parse.h"
> +#include "got_lib_object_idset.h"
> +#include "got_lib_privsep.h"
> +
> +typedef struct Cinfo	Cinfo;
> +typedef struct Tinfo	Tinfo;
> +typedef struct Object	Object;
> +typedef struct Pack	Pack;
> +typedef struct Buf	Buf;
> +typedef struct Dirent	Dirent;
> +typedef struct Idxent	Idxent;
> +typedef struct Ols	Ols;
> +
> +enum {
> +	/* 5k objects should be enough */
> +	Cachemax	= 5*1024,
> +	Pathmax		= 512,
> +	Hashsz		= 20,
> +	Pktmax		= 65536,
> +
> +	Nproto	= 16,
> +	Nport	= 16,
> +	Nhost	= 256,
> +	Npath	= 128,
> +	Nrepo	= 64,
> +	Nbranch	= 32,
> +};
> +
> +typedef enum Type {
> +	GNone	= 0,
> +	GCommit	= 1,
> +	GTree	= 2,
> +	GBlob	= 3,
> +	GTag	= 4,
> +	GOdelta	= 6,
> +	GRdelta	= 7,
> +} Type;
> +
> +enum {
> +	Cloaded	= 1 << 0,
> +	Cidx	= 1 << 1,
> +	Ccache	= 1 << 2,
> +	Cexist	= 1 << 3,
> +	Cparsed	= 1 << 5,
> +};
> +
> +struct Dirent {
> +	char *name;
> +	int modref;
> +	int mode;
> +	struct got_object_id h;
> +};
> +
> +struct Object {
> +	/* Git data */
> +	struct got_object_id	hash;
> +	Type	type;
> +
> +	/* Cache */
> +	int	id;
> +	int	flag;
> +	int	refs;
> +	Object	*next;
> +	Object	*prev;
> +
> +	/* For indexing */
> +	off_t	off;
> +	off_t	len;
> +	uint32_t	crc;
> +
> +	/* Everything below here gets cleared */
> +	char	*all;
> +	char	*data;
> +	/* size excludes header */
> +	off_t	size;
> +
> +	union {
> +		Cinfo *commit;
> +		Tinfo *tree;
> +	};
> +};
> +
> +struct Tinfo {
> +	/* Tree */
> +	Dirent	*ent;
> +	int	nent;
> +};
> +
> +struct Cinfo {
> +	/* Commit */
> +	struct got_object_id	*parent;
> +	int	nparent;
> +	struct got_object_id	tree;
> +	char	*author;
> +	char	*committer;
> +	char	*msg;
> +	int	nmsg;
> +	off_t	ctime;
> +	off_t	mtime;
> +};
> +
> +typedef struct Buf Buf;
> +
> +struct Buf {
> +	int len;
> +	int sz;
> +	char *data;
> +};
> +
> +static int	readpacked(FILE *, Object *, int);
> +static Object	*readidxobject(FILE *, struct got_object_id, int);
> +
> +struct got_object_idset *objcache;
> +int	next_object_id;
> +Object *lruhead;
> +Object *lrutail;
> +int	ncache;
> +
> +#define GETBE16(b)\
> +		((((b)[0] & 0xFFul) <<  8) | \
> +		 (((b)[1] & 0xFFul) <<  0))
> +
> +#define GETBE32(b)\
> +		((((b)[0] & 0xFFul) << 24) | \
> +		 (((b)[1] & 0xFFul) << 16) | \
> +		 (((b)[2] & 0xFFul) <<  8) | \
> +		 (((b)[3] & 0xFFul) <<  0))
> +#define GETBE64(b)\
> +		((((b)[0] & 0xFFull) << 56) | \
> +		 (((b)[1] & 0xFFull) << 48) | \
> +		 (((b)[2] & 0xFFull) << 40) | \
> +		 (((b)[3] & 0xFFull) << 32) | \
> +		 (((b)[4] & 0xFFull) << 24) | \
> +		 (((b)[5] & 0xFFull) << 16) | \
> +		 (((b)[6] & 0xFFull) <<  8) | \
> +		 (((b)[7] & 0xFFull) <<  0))
> +
> +#define PUTBE16(b, n)\
> +	do{ \
> +		(b)[0] = (n) >> 8; \
> +		(b)[1] = (n) >> 0; \
> +	} while(0)
> +
> +#define PUTBE32(b, n)\
> +	do{ \
> +		(b)[0] = (n) >> 24; \
> +		(b)[1] = (n) >> 16; \
> +		(b)[2] = (n) >> 8; \
> +		(b)[3] = (n) >> 0; \
> +	} while(0)
> +
> +#define PUTBE64(b, n)\
> +	do{ \
> +		(b)[0] = (n) >> 56; \
> +		(b)[1] = (n) >> 48; \
> +		(b)[2] = (n) >> 40; \
> +		(b)[3] = (n) >> 32; \
> +		(b)[4] = (n) >> 24; \
> +		(b)[5] = (n) >> 16; \
> +		(b)[6] = (n) >> 8; \
> +		(b)[7] = (n) >> 0; \
> +	} while(0)
> +
> +static int
> +charval(int c, int *err)
> +{
> +	if(c >= '0' && c <= '9')
> +		return c - '0';
> +	if(c >= 'a' && c <= 'f')
> +		return c - 'a' + 10;
> +	if(c >= 'A' && c <= 'F')
> +		return c - 'A' + 10;
> +	*err = 1;
> +	return -1;
> +}
> +
> +static int
> +hparse(struct got_object_id *h, char *b)
> +{
> +	int i, err;
> +
> +	err = 0;
> +	for(i = 0; i < sizeof(h->sha1); i++){
> +		err = 0;
> +		h->sha1[i] = 0;
> +		h->sha1[i] |= ((charval(b[2*i], &err) & 0xf) << 4);
> +		h->sha1[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0);
> +		if(err)
> +			return -1;
> +	}
> +	return 0;
> +}
> +
> +static void *
> +emalloc(size_t n)
> +{
> +	void *v;
> +	
> +	v = calloc(n, 1);
> +	if(v == NULL)
> +		err(1, "malloc:");
> +	return v;
> +}
> +
> +static void *
> +erealloc(void *p, ulong n)
> +{
> +	void *v;
> +	
> +	v = realloc(p, n);
> +	if(v == NULL)
> +		err(1, "realloc:");
> +	memset(v, 0, n);
> +	return v;
> +}
> +
> +static int
> +hasheq(struct got_object_id *a, struct got_object_id *b)
> +{
> +	return memcmp(a->sha1, b->sha1, sizeof(a->sha1)) == 0;
> +}
> +
> +static char *
> +typestr(int t)
> +{
> +	char *types[] = {
> +		"???",
> +		"commit",
> +		"tree",
> +		"blob",
> +		"tag",
> +		"odelta",
> +		"rdelta",
> +	};
> +	if (t < 0 || t >= sizeof(types)/sizeof(types[0]))
> +		abort();
> +	return types[t];
> +}
> +
> +static char *
> +hashfmt(char *out, size_t nout, struct got_object_id *h)
> +{
> +	int i, n, c0, c1;
> +	char *p;
> +
> +	if (nout < 2*sizeof(h->sha1) + 1)
> +		return NULL;
> +	p = out;
> +	for(i = 0; i < sizeof(h->sha1); i++){
> +		n = (h->sha1[i] >> 4) & 0xf;
> +		c0 = (n >= 10) ? n-10 + 'a' : n + '0';
> +		n = h->sha1[i] & 0xf;
> +		c1 = (n >= 10) ? n-10 + 'a' : n + '0';
> +		*p++ = c0;
> +		*p++ = c1;
> +	}
> +	*p++ = 0;
> +	return out;
> +}
> +
> +static void
> +clear(Object *o)
> +{
> +	if(!o)
> +		return;
> +
> +	assert(o->refs == 0);
> +	assert((o->flag & Ccache) == 0);
> +	assert(o->flag & Cloaded);
> +	switch(o->type){
> +	case GCommit:
> +		if(!o->commit)
> +			break;
> +		free(o->commit->parent);
> +		free(o->commit->author);
> +		free(o->commit->committer);
> +		free(o->commit);
> +		o->commit = NULL;
> +		break;
> +	case GTree:
> +		if(!o->tree)
> +			break;
> +		free(o->tree->ent);
> +		free(o->tree);
> +		o->tree = NULL;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	free(o->all);
> +	o->all = NULL;
> +	o->data = NULL;
> +	o->flag &= ~Cloaded;
> +}
> +
> +static void
> +unref(Object *o)
> +{
> +	if(!o)
> +		return;
> +	o->refs--;
> +	if(!o->refs)
> +		clear(o);
> +}
> +
> +static Object*
> +ref(Object *o)
> +{
> +	o->refs++;
> +	return o;
> +}
> +
> +static void
> +cache(Object *o)
> +{
> +	char buf[41];
> +	Object *p;
> +
> +	hashfmt(buf, sizeof(buf), &o->hash);
> +	if(o == lruhead)
> +		return;
> +	if(o == lrutail)
> +		lrutail = lrutail->prev;
> +	if(!(o->flag & Cexist)){
> +		got_object_idset_add(objcache, &o->hash, o);
> +		o->id = next_object_id++;
> +		o->flag |= Cexist;
> +	}
> +	if(o->prev)
> +		o->prev->next = o->next;
> +	if(o->next)
> +		o->next->prev = o->prev;
> +	if(lrutail == o){
> +		lrutail = o->prev;
> +		lrutail->next = NULL;
> +	}else if(!lrutail)
> +		lrutail = o;
> +	if(lruhead)
> +		lruhead->prev = o;
> +	o->next = lruhead;
> +	o->prev = NULL;
> +	lruhead = o;
> +
> +	if(!(o->flag & Ccache)){
> +		o->flag |= Ccache;
> +		ref(o);
> +		ncache++;
> +	}
> +	while(ncache > Cachemax){
> +		p = lrutail;
> +		lrutail = p->prev;
> +		lrutail->next = NULL;
> +		p->flag &= ~Ccache;
> +		p->prev = NULL;
> +		p->next = NULL;
> +		unref(p);
> +		ncache--;
> +	}		
> +}
> +
> +static int
> +preadbe32(FILE *b, int *v, off_t off)
> +{
> +	char buf[4];
> +	
> +	if(fseek(b, off, 0) == -1)
> +		return -1;
> +	if(fread(buf, 1, sizeof(buf), b) == -1)
> +		return -1;
> +	*v = GETBE32(buf);
> +
> +	return 0;
> +}
> +static int
> +preadbe64(FILE *b, off_t *v, off_t off)
> +{
> +	char buf[8];
> +	
> +	if(fseek(b, off, 0) == -1)
> +		return -1;
> +	if(fread(buf, 1, sizeof(buf), b) == -1)
> +		return -1;
> +	*v = GETBE64(buf);
> +	return 0;
> +}
> +
> +static int
> +readvint(char *p, char **pp)
> +{
> +	int i, n, c;
> +	
> +	i = 0;
> +	n = 0;
> +	do {
> +		c = *p++;
> +		n |= (c & 0x7f) << i;
> +		i += 7;
> +	} while (c & 0x80);
> +	*pp = p;
> +
> +	return n;
> +}
> +
> +static int
> +applydelta(Object *dst, Object *base, char *d, int nd)
> +{
> +	char *r, *b, *ed, *er;
> +	int n, nr, c;
> +	off_t o, l;
> +
> +	ed = d + nd;
> +	b = base->data;
> +	n = readvint(d, &d);
> +	if(n != base->size){
> +		fprintf(stderr, "mismatched source size");
> +		return -1;
> +	}
> +
> +	nr = readvint(d, &d);
> +	r = emalloc(nr + 64);
> +	n = snprintf(r, 64, "%s %d", typestr(base->type), nr) + 1;
> +	dst->all = r;
> +	dst->type = base->type;
> +	dst->data = r + n;
> +	dst->size = nr;
> +	er = dst->data + nr;
> +	r = dst->data;
> +
> +	while(1){
> +		if(d == ed)
> +			break;
> +		c = *d++;
> +		if(!c){
> +			fprintf(stderr, "bad delta encoding");
> +			return -1;
> +		}
> +		/* copy from base */
> +		if(c & 0x80){
> +			o = 0;
> +			l = 0;
> +			/* Offset in base */
> +			if(c & 0x01 && d != ed) o |= (*d++ <<  0) & 0x000000ff;
> +			if(c & 0x02 && d != ed) o |= (*d++ <<  8) & 0x0000ff00;
> +			if(c & 0x04 && d != ed) o |= (*d++ << 16) & 0x00ff0000;
> +			if(c & 0x08 && d != ed) o |= (*d++ << 24) & 0xff000000;
> +
> +			/* Length to copy */
> +			if(c & 0x10 && d != ed) l |= (*d++ <<  0) & 0x0000ff;
> +			if(c & 0x20 && d != ed) l |= (*d++ <<  8) & 0x00ff00;
> +			if(c & 0x40 && d != ed) l |= (*d++ << 16) & 0xff0000;
> +			if(l == 0) l = 0x10000;
> +
> +			assert(o + l <= base->size);
> +			memmove(r, b + o, l);
> +			r += l;
> +		/* inline data */
> +		}else{
> +			memmove(r, d, c);
> +			d += c;
> +			r += c;
> +		}
> +
> +	}
> +	if(r != er){
> +		fprintf(stderr, "truncated delta (%zd)", er - r);
> +		return -1;
> +	}
> +
> +	return nr;
> +}
> +
> +static int
> +readrdelta(FILE *f, Object *o, int nd, int flag)
> +{
> +	const struct got_error *e;
> +	struct got_object_id h;
> +	Object *b;
> +	uint8_t *d;
> +	size_t n;
> +
> +	d = NULL;
> +	if(fread(h.sha1, 1, sizeof(h.sha1), f) != sizeof(h.sha1))
> +		goto error;
> +	if(hasheq(&o->hash, &h))
> +		goto error;
> +	if ((e = got_inflate_to_mem(&d, &n, f)) != NULL)
> +		goto error;
> +	o->len = ftello(f) - o->off;
> +	if(d == NULL || n != nd)
> +		goto error;
> +	if((b = readidxobject(f, h, flag)) == NULL)
> +		goto error;
> +	if(applydelta(o, b, d, n) == -1)
> +		goto error;
> +	free(d);
> +	return 0;
> +error:
> +	free(d);
> +	return -1;
> +}
> +
> +static int
> +readodelta(FILE *f, Object *o, off_t nd, off_t p, int flag)
> +{
> +	Object b;
> +	uint8_t *d;
> +	off_t r;
> +	size_t n;
> +	int c;
> +
> +	r = 0;
> +	d = NULL;
> +	while(1){
> +		if((c = fgetc(f)) == -1)
> +			goto error;
> +		r |= c & 0x7f;
> +		if (!(c & 0x80))
> +			break;
> +		r++;
> +		r <<= 7;
> +	}while(c & 0x80);
> +
> +	if(r > p){
> +		fprintf(stderr, "junk offset -%lld (from %lld)", r, p);
> +		goto error;
> +	}
> +
> +	if (got_inflate_to_mem(&d, &n, f) == NULL)
> +		goto error;
> +	o->len = ftello(f) - o->off;
> +	if(d == NULL || n != nd)
> +		goto error;
> +	if(fseek(f, p - r, 0) == -1)
> +		goto error;
> +	if(readpacked(f, &b, flag) == -1)
> +		goto error;
> +	if(applydelta(o, &b, d, nd) == -1)
> +		goto error;
> +	free(d);
> +	return 0;
> +error:
> +	free(d);
> +	return -1;
> +}
> +
> +static int
> +readpacked(FILE *f, Object *o, int flag)
> +{
> +	const struct got_error *e;
> +	int c, s, n;
> +	off_t l, p;
> +	size_t ndata;
> +	uint8_t *data;
> +	Type t;
> +	Buf b;
> +
> +	p = ftello(f);
> +	c = fgetc(f);
> +	if(c == -1)
> +		return -1;
> +	l = c & 0xf;
> +	s = 4;
> +	t = (c >> 4) & 0x7;
> +	if(!t){
> +		fprintf(stderr, "unknown type for byte %x", c);
> +		return -1;
> +	}
> +	while(c & 0x80){
> +		if((c = fgetc(f)) == -1)
> +			return -1;
> +		l |= (c & 0x7f) << s;
> +		s += 7;
> +	}
> +
> +	switch(t){
> +	default:
> +		fprintf(stderr, "invalid object at %lld", ftello(f));
> +		return -1;
> +	case GCommit:
> +	case GTree:
> +	case GTag:
> +	case GBlob:
> +		b.sz = 64 + l;
> +
> +		b.data = emalloc(b.sz);
> +		n = snprintf(b.data, 64, "%s %lld", typestr(t), l) + 1;
> +		b.len = n;
> +		e = got_inflate_to_mem(&data, &ndata, f);
> +		if (e != NULL || n + ndata >= b.sz) {
> +			free(b.data);
> +			return -1;
> +		}
> +		memcpy(b.data + n, data, ndata);
> +		o->len = ftello(f) - o->off;
> +		o->type = t;
> +		o->all = b.data;
> +		o->data = b.data + n;
> +		o->size = ndata;
> +		free(data);
> +		break;
> +	case GOdelta:
> +		if(readodelta(f, o, l, p, flag) == -1)
> +			return -1;
> +		break;
> +	case GRdelta:
> +		if(readrdelta(f, o, l, flag) == -1)
> +			return -1;
> +		break;
> +	}
> +	o->flag |= Cloaded|flag;
> +	return 0;
> +}
> +
> +static int
> +readloose(FILE *f, Object *o, int flag)
> +{
> +	struct { char *tag; int type; } *p, types[] = {
> +		{"blob", GBlob},
> +		{"tree", GTree},
> +		{"commit", GCommit},
> +		{"tag", GTag},
> +		{NULL},
> +	};
> +	char *s, *e;
> +	uint8_t *d;
> +	off_t sz;
> +	size_t n;
> +	int l;
> +
> +	if (got_inflate_to_mem(&d, &n, f) != NULL)
> +		return -1;
> +
> +	s = (char *)d;
> +	o->type = GNone;
> +	for(p = types; p->tag; p++){
> +		l = strlen(p->tag);
> +		if(strncmp(s, p->tag, l) == 0){
> +			s += l;
> +			o->type = p->type;
> +			while(!isspace(*s))
> +				s++;
> +			break;
> +		}
> +	}
> +	if(o->type == GNone){
> +		free(o->data);
> +		return -1;
> +	}
> +	sz = strtol(s, &e, 0);
> +	if(e == s || *e++ != 0){
> +		fprintf(stderr, "malformed object header");
> +		goto error;
> +	}
> +	if(sz != n - (e - (char *)d)){
> +		fprintf(stderr, "mismatched sizes");
> +		goto error;
> +	}
> +	o->size = sz;
> +	o->data = e;
> +	o->all = d;
> +	o->flag |= Cloaded|flag;
> +	return 0;
> +
> +error:
> +	free(d);
> +	return -1;
> +}
> +
> +static off_t
> +searchindex(FILE *f, struct got_object_id h)
> +{
> +	int lo, hi, idx, i, nent;
> +	off_t o, oo;
> +	struct got_object_id hh;
> +
> +	o = 8;
> +	/*
> +	 * Read the fanout table. The fanout table
> +	 * contains 256 entries, corresponsding to
> +	 * the first byte of the hash. Each entry
> +	 * is a 4 byte big endian integer, containing
> +	 * the total number of entries with a leading
> +	 * byte <= the table index, allowing us to
> +	 * rapidly do a binary search on them.
> +	 */
> +	if (h.sha1[0] == 0){
> +		lo = 0;
> +		if(preadbe32(f, &hi, o) == -1)
> +			goto err;
> +	} else {
> +		o += h.sha1[0]*4 - 4;
> +		if(preadbe32(f, &lo, o + 0) == -1)
> +			goto err;
> +		if(preadbe32(f, &hi, o + 4) == -1)
> +			goto err;
> +	}
> +	if(hi == lo)
> +		goto notfound;
> +	if(preadbe32(f, &nent, 8 + 255*4) == -1)
> +		goto err;
> +
> +	/*
> +	 * Now that we know the range of hashes that the
> +	 * entry may exist in, read them in so we can do
> +	 * a bsearch.
> +	 */
> +	idx = -1;
> +	fseek(f, Hashsz*lo + 8 + 256*4, 0);
> +	for(i = 0; i < hi - lo; i++){
> +		if(fread(hh.sha1, 1, sizeof(hh.sha1), f) == -1)
> +			goto err;
> +		if(hasheq(&hh, &h))
> +			idx = lo + i;
> +	}
> +	if(idx == -1)
> +		goto notfound;
> +
> +
> +	/*
> +	 * We found the entry. If it's 32 bits, then we
> +	 * can just return the oset, otherwise the 32
> +	 * bit entry contains the oset to the 64 bit
> +	 * entry.
> +	 */
> +	oo = 8;			/* Header */
> +	oo += 256*4;		/* Fanout table */
> +	oo += Hashsz*nent;	/* Hashes */
> +	oo += 4*nent;		/* Checksums */
> +	oo += 4*idx;		/* Offset offset */
> +	if(preadbe32(f, &i, oo) == -1)
> +		goto err;
> +	o = i & 0xffffffff;
> +	if(o & (1ull << 31)){
> +		o &= 0x7fffffff;
> +		if(preadbe64(f, &o, o) == -1)
> +			goto err;
> +	}
> +	return o;
> +
> +err:
> +	fprintf(stderr, "unable to read packfile\n");
> +	return -1;
> +notfound:
> +	{
> +		char hstr[41];
> +		hashfmt(hstr, sizeof(hstr), &h);
> +		fprintf(stdout, "could not find object %s\n", hstr);
> +	}
> +	return -1;		
> +}
> +
> +/*
> + * Scans for non-empty word, copying it into buf.
> + * Strips off word, leading, and trailing space
> + * from input.
> + * 
> + * Returns -1 on empty string or error, leaving
> + * input unmodified.
> + */
> +static int
> +scanword(char **str, int *nstr, char *buf, int nbuf)
> +{
> +	char *p;
> +	int n, r;
> +
> +	r = -1;
> +	p = *str;
> +	n = *nstr;
> +	while(n && isblank(*p)){
> +		n--;
> +		p++;
> +	}
> +
> +	for(; n && *p && !isspace(*p); p++, n--){
> +		r = 0;
> +		*buf++ = *p;
> +		nbuf--;
> +		if(nbuf == 0)
> +			return -1;
> +	}
> +	while(n && isblank(*p)){
> +		n--;
> +		p++;
> +	}
> +	*buf = 0;
> +	*str = p;
> +	*nstr = n;
> +	return r;
> +}
> +
> +static void
> +nextline(char **str, int *nstr)
> +{
> +	char *s;
> +
> +	if((s = strchr(*str, '\n')) != NULL){
> +		*nstr -= s - *str + 1;
> +		*str = s + 1;
> +	}
> +}
> +
> +static int
> +parseauthor(char **str, int *nstr, char **name, off_t *time)
> +{
> +	return 0;
> +}
> +
> +static void
> +parsecommit(Object *o)
> +{
> +	char *p, *t, buf[128];
> +	int np;
> +
> +	p = o->data;
> +	np = o->size;
> +	o->commit = emalloc(sizeof(Cinfo));
> +	while(1){
> +		if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> +			break;
> +		if(strcmp(buf, "tree") == 0){
> +			if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> +				errx(1, "invalid commit: tree missing");
> +			if(hparse(&o->commit->tree, buf) == -1)
> +				errx(1, "invalid commit: garbled tree");
> +		}else if(strcmp(buf, "parent") == 0){
> +			if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> +				errx(1, "invalid commit: missing parent");
> +			o->commit->parent = realloc(o->commit->parent, ++o->commit->nparent * sizeof(struct got_object_id));
> +			if(!o->commit->parent)
> +				err(1, "unable to malloc: ");
> +			if(hparse(&o->commit->parent[o->commit->nparent - 1], buf) == -1)
> +				errx(1, "invalid commit: garbled parent");
> +		}else if(strcmp(buf, "author") == 0){
> +			parseauthor(&p, &np, &o->commit->author, &o->commit->mtime);
> +		}else if(strcmp(buf, "committer") == 0){
> +			parseauthor(&p, &np, &o->commit->committer, &o->commit->ctime);
> +		}else if(strcmp(buf, "gpgsig") == 0){
> +			/* just drop it */
> +			if((t = strstr(p, "-----END PGP SIGNATURE-----")) == NULL)
> +				errx(1, "malformed gpg signature");
> +			np -= t - p;
> +			p = t;
> +		}
> +		nextline(&p, &np);
> +	}
> +	while (np && isspace(*p)) {
> +		p++;
> +		np--;
> +	}
> +	o->commit->msg = p;
> +	o->commit->nmsg = np;
> +}
> +
> +static void
> +parsetree(Object *o)
> +{
> +	char *p, buf[256];
> +	int np, nn, m;
> +	Dirent *t;
> +
> +	p = o->data;
> +	np = o->size;
> +	o->tree = emalloc(sizeof(Tinfo));
> +	while(np > 0){
> +		if(scanword(&p, &np, buf, sizeof(buf)) == -1)
> +			break;
> +		o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent));
> +		t = &o->tree->ent[o->tree->nent - 1];
> +		memset(t, 0, sizeof(Dirent));
> +		m = strtol(buf, NULL, 8);
> +		/* FIXME: symlinks and other BS */
> +		if(m == 0160000){
> +			t->mode |= S_IFDIR;
> +			t->modref = 1;
> +		}
> +		t->mode = m & 0777;
> +		if(m & 0040000)
> +			t->mode |= S_IFDIR;
> +		t->name = p;
> +		nn = strlen(p) + 1;
> +		p += nn;
> +		np -= nn;
> +		if(np < sizeof(t->h.sha1))
> +			errx(1, "malformed tree, remaining %d (%s)", np, p);
> +		memcpy(t->h.sha1, p, sizeof(t->h.sha1));
> +		p += sizeof(t->h.sha1);
> +		np -= sizeof(t->h.sha1);
> +	}
> +}
> +
> +void
> +parseobject(Object *o)
> +{
> +	if(o->flag & Cparsed)
> +		return;
> +	switch(o->type){
> +	case GTree:	parsetree(o);	break;
> +	case GCommit:	parsecommit(o);	break;
> +	//case GTag:	parsetag(o);	break;
> +	default:	break;
> +	}
> +	o->flag |= Cparsed;
> +}
> +
> +static Object*
> +readidxobject(FILE *idx, struct got_object_id h, int flag)
> +{
> +	char path[Pathmax];
> +	char hbuf[41];
> +	FILE *f;
> +	Object *obj;
> +	int l, n;
> +	off_t o;
> +	struct dirent *ent;
> +	DIR *d;
> +
> +
> +	if ((obj = got_object_idset_lookup_data(objcache, &h))) {
> +		if(obj->flag & Cloaded)
> +			return obj;
> +		if(obj->flag & Cidx){
> +			assert(idx != NULL);
> +			o = ftello(idx);
> +			if(fseek(idx, obj->off, 0) == -1)
> +				errx(1, "could not seek to object offset");
> +			if(readpacked(idx, obj, flag) == -1)
> +				errx(1, "could not reload object");
> +			if(fseek(idx, o, 0) == -1)
> +				errx(1, "could not restore offset");
> +			cache(obj);
> +			return obj;
> +		}
> +	}
> +
> +	d = NULL;
> +	/* We're not putting it in the cache yet... */
> +	obj = emalloc(sizeof(Object));
> +	obj->id = next_object_id + 1;
> +	obj->hash = h;
> +
> +	hashfmt(hbuf, sizeof(hbuf), &h);
> +	snprintf(path, sizeof(path), ".git/objects/%c%c/%s", hbuf[0], hbuf[1], hbuf + 2);
> +	if((f = fopen(path, "r")) != NULL){
> +		if(readloose(f, obj, flag) == -1)
> +			goto error;
> +		fclose(f);
> +		parseobject(obj);
> +		hashfmt(hbuf, sizeof(hbuf), &obj->hash);
> +		fprintf(stderr, "object %s cached", hbuf);
> +		cache(obj);
> +		return obj;
> +	}
> +
> +	o = -1;
> +	if ((d = opendir(".git/objects/pack")) == NULL)
> +		err(1, "open pack dir");
> +	while ((ent = readdir(d)) != NULL) {
> +		l = strlen(ent->d_name);
> +		if(l > 4 && strcmp(ent->d_name + l - 4, ".idx") != 0)
> +			continue;
> +		snprintf(path, sizeof(path), ".git/objects/pack/%s", ent->d_name);
> +		if((f = fopen(path, "r")) == NULL)
> +			continue;
> +		o = searchindex(f, h);
> +		fclose(f);
> +		if(o == -1)
> +			continue;
> +		break;
> +	}
> +	closedir(d);
> +
> +	if (o == -1)
> +		goto error;
> +
> +	if((n = snprintf(path, sizeof(path), "%s", path)) >= sizeof(path) - 4)
> +		goto error;
> +	memcpy(path + n - 4, ".pack", 6);
> +	if((f = fopen(path, "r")) == NULL)
> +		goto error;
> +	if(fseek(f, o, 0) == -1)
> +		goto error;
> +	if(readpacked(f, obj, flag) == -1)
> +		goto error;
> +	fclose(f);
> +	parseobject(obj);
> +	cache(obj);
> +	return obj;
> +error:
> +	free(obj);
> +	return NULL;
> +}
> +
> +Object*
> +readobject(struct got_object_id h)
> +{
> +	Object *o;
> +
> +	o = readidxobject(NULL, h, 0);
> +	if(o)
> +		ref(o);
> +	return o;
> +}
> +
> +int
> +objcmp(const void *pa, const void *pb)
> +{
> +	Object *a, *b;
> +
> +	a = *(Object**)pa;
> +	b = *(Object**)pb;
> +	return memcmp(a->hash.sha1, b->hash.sha1, sizeof(a->hash.sha1));
> +}
> +
> +static int
> +hwrite(FILE *b, void *buf, int len, SHA1_CTX *ctx)
> +{
> +	SHA1Update(ctx, buf, len);
> +	return fwrite(buf, 1, len, b);
> +}
> +
> +static uint32_t
> +objectcrc(FILE *f, Object *o)
> +{
> +	char buf[8096];
> +	int n, r;
> +
> +	o->crc = 0;
> +	fseek(f, o->off, 0);
> +	for(n = o->len; n > 0; n -= r){
> +		r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f);
> +		if(r == -1)
> +			return -1;
> +		if(r == 0)
> +			return 0;
> +		o->crc = crc32(o->crc, buf, r);
> +	}
> +	return 0;
> +}
> +
> +int
> +indexpack(int packfd, int idxfd, struct got_object_id packhash)
> +{
> +	char hdr[4*3], buf[8];
> +	int nobj, nvalid, nbig, n, i, step;
> +	Object *o, **objects;
> +	char *valid;
> +	SHA1_CTX ctx, objctx;
> +	FILE *f;
> +	struct got_object_id h;
> +	int c;
> +
> +	if ((f = fdopen(packfd, "r")) == NULL)
> +		return -1;
> +	if (fseek(f, 0, SEEK_SET) == -1)
> +		return -1;
> +	if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) {
> +		fprintf(stderr, "short read on header");
> +		return -1;
> +	}
> +	if (memcmp(hdr, "PACK\0\0\0\2", 8) != 0) {
> +		fprintf(stderr, "invalid header");
> +		return -1;
> +	}
> +
> +	nvalid = 0;
> +	nobj = GETBE32(hdr + 8);
> +	objects = calloc(nobj, sizeof(Object*));
> +	valid = calloc(nobj, sizeof(char));
> +	step = nobj/100;
> +	if(!step)
> +		step++;
> +	while (nvalid != nobj) {
> +		fprintf(stderr, "indexing (%d/%d):", nvalid, nobj);
> +		n = 0;
> +		for (i = 0; i < nobj; i++) {
> +			if (valid[i]) {
> +				n++;
> +				continue;
> +			}
> +			if (i % step == 0)
> +				fprintf(stderr, ".");
> +			if (!objects[i]) {
> +				o = emalloc(sizeof(Object));
> +				o->off = ftello(f);
> +				objects[i] = o;
> +			}
> +			o = objects[i];
> +			fseek(f, o->off, 0);
> +			if (readpacked(f, o, Cidx) == 0){
> +				SHA1Init(&objctx);
> +				SHA1Update(&objctx, (uint8_t*)o->all, o->size + strlen(o->all) + 1);
> +				SHA1Final(o->hash.sha1, &objctx);
> +				cache(o);
> +				valid[i] = 1;
> +				n++;
> +			}
> +			if(objectcrc(f, o) == -1)
> +				return -1;
> +		}
> +		fprintf(stderr, "\n");
> +		if (n == nvalid) {
> +			errx(1, "fix point reached too early: %d/%d", nvalid, nobj);
> +			goto error;
> +		}
> +		nvalid = n;
> +	}
> +	fclose(f);
> +
> +	SHA1Init(&ctx);
> +	qsort(objects, nobj, sizeof(Object*), objcmp);
> +	if((f = fdopen(idxfd, "w")) == NULL)
> +		return -1;
> +	if(hwrite(f, "\xfftOc\x00\x00\x00\x02", 8, &ctx) != 8)
> +		goto error;
> +	/* fanout table */
> +	c = 0;
> +	for(i = 0; i < 256; i++){
> +		while(c < nobj && (objects[c]->hash.sha1[0] & 0xff) <= i)
> +			c++;
> +		PUTBE32(buf, c);
> +		hwrite(f, buf, 4, &ctx);
> +	}
> +	for(i = 0; i < nobj; i++){
> +		o = objects[i];
> +		hwrite(f, o->hash.sha1, sizeof(o->hash.sha1), &ctx);
> +	}
> +
> +	/* pointless, nothing uses this */
> +	for(i = 0; i < nobj; i++){
> +		PUTBE32(buf, objects[i]->crc);
> +		hwrite(f, buf, 4, &ctx);
> +	}
> +
> +	nbig = 0;
> +	for(i = 0; i < nobj; i++){
> +		if(objects[i]->off <= (1ull<<31))
> +			PUTBE32(buf, objects[i]->off);
> +		else
> +			PUTBE32(buf, (1ull << 31) | nbig++);
> +		hwrite(f, buf, 4, &ctx);
> +	}
> +	for(i = 0; i < nobj; i++){
> +		if(objects[i]->off > (1ull<<31)){
> +			PUTBE64(buf, objects[i]->off);
> +			hwrite(f, buf, 8, &ctx);
> +		}
> +	}
> +	hwrite(f, packhash.sha1, sizeof(packhash.sha1), &ctx);
> +	SHA1Final(h.sha1, &ctx);
> +	fwrite(h.sha1, 1, sizeof(h.sha1), f);
> +
> +	free(objects);
> +	free(valid);
> +	fclose(f);
> +	return 0;
> +
> +error:
> +	free(objects);
> +	free(valid);
> +	fclose(f);
> +	return -1;
> +}
> +
> +int
> +main(int argc, char **argv)
> +{
> +	const struct got_error *err = NULL;
> +	struct got_object_id packhash;
> +	struct imsgbuf ibuf;
> +	struct imsg imsg;
> +	int packfd, idxfd;
> +
> +	objcache = got_object_idset_alloc();
> +	imsg_init(&ibuf, GOT_IMSG_FD_CHILD);
> +	if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
> +		if (err->code == GOT_ERR_PRIVSEP_PIPE)
> +			err = NULL;
> +		goto done;
> +	}
> +	if (imsg.hdr.type == GOT_IMSG_STOP)
> +		goto done;
> +	if (imsg.hdr.type != GOT_IMSG_IDXPACK_REQUEST) {
> +		err = got_error(GOT_ERR_PRIVSEP_MSG);
> +		goto done;
> +	}
> +	if (imsg.hdr.len - IMSG_HEADER_SIZE != SHA1_DIGEST_LENGTH) {
> +		err = got_error(GOT_ERR_PRIVSEP_LEN);
> +		goto done;
> +	}
> +	packfd = imsg.fd;
> +	memcpy(packhash.sha1, imsg.data, SHA1_DIGEST_LENGTH);
> +	got_privsep_send_ack(&ibuf);
> +
> +	if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
> +		if (err->code == GOT_ERR_PRIVSEP_PIPE)
> +			err = NULL;
> +		goto done;
> +	}
> +	if (imsg.hdr.type == GOT_IMSG_STOP)
> +		goto done;
> +	if (imsg.hdr.type != GOT_IMSG_TMPFD) {
> +		err = got_error(GOT_ERR_PRIVSEP_MSG);
> +		goto done;
> +	}
> +	if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) {
> +		err = got_error(GOT_ERR_PRIVSEP_LEN);
> +		goto done;
> +	}
> +	idxfd = imsg.fd;
> +
> +	indexpack(packfd, idxfd, packhash);
> +done:
> +	if(err != NULL)
> +		got_privsep_send_error(&ibuf, err);
> +	else
> +		err = got_privsep_send_index_pack_done(&ibuf);
> +	if(err != NULL) {
> +		fprintf(stderr, "%s: %s\n", getprogname(), err->msg);
> +		got_privsep_send_error(&ibuf, err);
> +	}
> +
> +	exit(0);
> +}
> 
> -- 
>     Ori Bernstein
> 
>