Download raw body.
got clone: start of effort.
This diff implements the fetch protocol support for git+ssh, and almost implements it for unencrypted git:// (haven't written the code to open the socket yet) It's a quick and dirty port/airdrop/privsepification of the code I wrote for git9, so it doesn't match the style or the data structures used in the rest of got. We'll need to do a few passes to clean it up and make it fit before i'd be happy getting it in, but I'd like to show where things are. There's no unveil or pledge. The code in git-fetch-pack should be usable for incremental fetches, too, once we do the plumbing to figure out which objects are in the tree. I'll be doing another diff to implement git push, doing the same kind of airdrop. diff --git a/got/Makefile b/got/Makefile index 709948a..27b8780 100644 --- a/got/Makefile +++ b/got/Makefile @@ -8,7 +8,7 @@ SRCS= got.c blame.c commit_graph.c delta.c diff.c \ object_idset.c object_parse.c opentemp.c path.c pack.c \ privsep.c reference.c repository.c sha1.c worktree.c \ inflate.c buf.c rcsutil.c diff3.c lockfile.c \ - deflate.c object_create.c delta_cache.c + deflate.c object_create.c delta_cache.c fetch.c MAN = ${PROG}.1 got-worktree.5 git-repository.5 CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib diff --git a/got/got.c b/got/got.c index 9f40340..22bd4d7 100644 --- a/got/got.c +++ b/got/got.c @@ -45,6 +45,7 @@ #include "got_worktree.h" #include "got_diff.h" #include "got_commit_graph.h" +#include "got_lib_fetch.h" #include "got_blame.h" #include "got_privsep.h" #include "got_opentemp.h" @@ -80,6 +81,7 @@ __dead static void usage(int); __dead static void usage_init(void); __dead static void usage_import(void); __dead static void usage_checkout(void); +__dead static void usage_clone(void); __dead static void usage_update(void); __dead static void usage_log(void); __dead static void usage_diff(void); @@ -104,6 +106,7 @@ __dead static void usage_cat(void); static const struct got_error* cmd_init(int, char *[]); static const struct got_error* cmd_import(int, char *[]); +static const struct got_error* cmd_clone(int, char *[]); static const struct got_error* cmd_checkout(int, char *[]); static const struct got_error* cmd_update(int, char *[]); static const struct got_error* cmd_log(int, char *[]); @@ -131,6 +134,7 @@ static struct got_cmd got_commands[] = { { "init", cmd_init, usage_init, "in" }, { "import", cmd_import, usage_import, "im" }, { "checkout", cmd_checkout, usage_checkout, "co" }, + { "clone", cmd_clone, usage_clone, "cl" }, { "update", cmd_update, usage_update, "up" }, { "log", cmd_log, usage_log, "" }, { "diff", cmd_diff, usage_diff, "di" }, @@ -794,6 +798,13 @@ done: return error; } +__dead static void +usage_clone(void) +{ + fprintf(stderr, "usage: %s clone repo-path\n", getprogname()); + exit(1); +} + __dead static void usage_checkout(void) { @@ -969,6 +980,34 @@ resolve_commit_arg(struct got_object_id **commit_id, return err; } +static const struct got_error * +cmd_clone(int argc, char *argv[]) +{ + char *uri, *branch_filter, *dirname; + int ch; + + while ((ch = getopt(argc, argv, "b:c:p:")) != -1) { + switch (ch) { + case 'b': + branch_filter = optarg; + break; + default: + usage_clone(); + break; + } + } + argc -= optind; + argv += optind; + uri = argv[0]; + if(argc == 1) + dirname = NULL; + else if(argc == 2) + dirname = argv[1]; + else + usage_clone(); + return got_clone(argv[0], branch_filter, dirname); +} + static const struct got_error * cmd_checkout(int argc, char *argv[]) { diff --git a/include/got_error.h b/include/got_error.h index d0df125..50c6e2d 100644 --- a/include/got_error.h +++ b/include/got_error.h @@ -128,6 +128,9 @@ #define GOT_ERR_REGEX 112 #define GOT_ERR_REF_NAME_MINUS 113 #define GOT_ERR_GITCONFIG_SYNTAX 114 +#define GOT_ERR_FETCH_FAILED 115 +#define GOT_ERR_PARSE_URI 116 +#define GOT_ERR_BAD_PROTO 117 static const struct got_error { int code; @@ -262,6 +265,9 @@ static const struct got_error { { GOT_ERR_REGEX, "regular expression error" }, { GOT_ERR_REF_NAME_MINUS, "reference name may not start with '-'" }, { GOT_ERR_GITCONFIG_SYNTAX, "gitconfig syntax error" }, + { GOT_ERR_FETCH_FAILED, "fetch failed" }, + { GOT_ERR_PARSE_URI, "failed to parse uri" }, + { GOT_ERR_BAD_PROTO, "unknown protocol" }, }; /* diff --git a/lib/fetch.c b/lib/fetch.c new file mode 100644 index 0000000..dfba5c3 --- /dev/null +++ b/lib/fetch.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2018, 2019 Stefan Sperling <stsp@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/queue.h> +#include <sys/uio.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/syslimits.h> +#include <sys/resource.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sha1.h> +#include <zlib.h> +#include <ctype.h> +#include <limits.h> +#include <imsg.h> +#include <time.h> +#include <uuid.h> + +#include "got_error.h" +#include "got_reference.h" +#include "got_repository.h" +#include "got_path.h" +#include "got_cancel.h" +#include "got_worktree.h" +#include "got_object.h" + +#include "got_lib_delta.h" +#include "got_lib_inflate.h" +#include "got_lib_object.h" +#include "got_lib_object_parse.h" +#include "got_lib_object_create.h" +#include "got_lib_pack.h" +#include "got_lib_sha1.h" +#include "got_lib_privsep.h" +#include "got_lib_object_cache.h" +#include "got_lib_repository.h" + +#define GOT_PROTOMAX 64 +#define GOT_HOSTMAX 256 +#define GOT_PATHMAX 512 +#define GOT_REPOMAX 256 +#define GOT_PORTMAX 16 +#define GOT_URIMAX 1024 + +static int +mkpath(char *path) +{ + char *p, namebuf[PATH_MAX]; + struct stat sb; + int done; + + while (*path == '/') + path++; + if(strlcpy(namebuf, path, sizeof(namebuf)) >= sizeof(namebuf)) { + errno = ENAMETOOLONG; + return -1; + } + + p = namebuf; + for (;;) { + p += strspn(p, "/"); + p += strcspn(p, "/"); + done = (*p == '\0'); + *p = '\0'; + + if (mkdir(namebuf, 0755) != 0) { + int mkdir_errno = errno; + if (stat(path, &sb) == -1) { + /* Not there; use mkdir()s errno */ + errno = mkdir_errno; + return -1; + } + if (!S_ISDIR(sb.st_mode)) { + /* Is there, but isn't a directory */ + errno = ENOTDIR; + return -1; + } + } + + if (done) + break; + *p = '/'; + } + + return 0; +} + +static int +hassuffix(char *base, char *suf) +{ + int nb, ns; + + nb = strlen(base); + ns = strlen(suf); + if(ns <= nb && strcmp(base + (nb - ns), suf) == 0) + return 1; + return 0; +} + +static int +grab(char *dst, int n, char *p, char *e) +{ + int l; + + l = e - p; + if(l >= n) { + errno = ENAMETOOLONG; + return -1; + } + return strlcpy(dst, p, l + 1); +} + +static int +got_dial_ssh(char *host, char *port, char *path, char *direction) +{ + int pid, pfd[2]; + char cmd[64]; + + if(pipe(pfd) == -1) + return -1; + pid = fork(); + if(pid == -1) + return -1; + if(pid == 0){ + close(pfd[1]); + dup2(pfd[0], 0); + dup2(pfd[0], 1); + snprintf(cmd, sizeof(cmd), "git-%s-pack", direction); + execlp("ssh", "ssh", host, cmd, path, NULL); + abort(); + }else{ + close(pfd[0]); + return pfd[1]; + } +} + +#if 0 +int +got_dial_git(char *host, char *port, char *path, char *direction) +{ + struct sockaddr_storage sk; + char *ds, cmd[128]; + int fd, l; + + ds = netmkaddr(host, "tcp", port); + fd = dial(ds, NULL, NULL, NULL); + if(fd == -1) + return -1; + if(chattygit) + fprint(2, "dial %s %s git-%s-pack %s\n", host, port, direction, path); + l = snprint(cmd, sizeof(cmd), "git-%s-pack %s\n", direction, path); + if(writepkt(fd, cmd, l + 1) == -1){ + print("failed to write message\n"); + close(fd); + return -1; + } + return fd; +} +#endif + +int +got_parse_uri(char *uri, char *proto, char *host, char *port, char *path, char *repo) +{ + char *s, *p, *q; + int n, hasport; + + p = strstr(uri, "://"); + if(!p){ + //werrstr("missing protocol"); + return -1; + } + if (grab(proto, GOT_PROTOMAX, uri, p) == -1) + return -1; + hasport = (strcmp(proto, "git") == 0 || strstr(proto, "http") == proto); + s = p + 3; + p = NULL; + if(!hasport){ + p = strstr(s, ":"); + if(p != NULL) + p++; + } + if(p == NULL) + p = strstr(s, "/"); + if(p == NULL || strlen(p) == 1){ + //werrstr("missing path"); + return -1; + } + + q = memchr(s, ':', p - s); + if(q){ + grab(host, GOT_HOSTMAX, s, q); + grab(port, GOT_PORTMAX, q + 1, p); + }else{ + grab(host, GOT_HOSTMAX, s, p); + snprintf(port, GOT_PORTMAX, "9418"); + } + + snprintf(path, GOT_PATHMAX, "%s", p); + p = strrchr(p, '/') + 1; + if(!p || strlen(p) == 0){ + //werrstr("missing repository in uri"); + return -1; + } + n = strlen(p); + if(hassuffix(p, ".git")) + n -= 4; + grab(repo, GOT_REPOMAX, p, p + n); + return 0; +} + +const struct got_error* +got_clone(char *uri, char *branch_filter, char *dirname) +{ + char proto[GOT_PROTOMAX], host[GOT_HOSTMAX], port[GOT_PORTMAX]; + char repo[GOT_REPOMAX], path[GOT_PATHMAX]; + int imsg_fetchfds[2], imsg_idxfds[2], fetchfd; + int packfd, npackfd, idxfd, nidxfd, status; + struct got_object_id packhash; + const struct got_error *err; + struct imsgbuf ibuf; + pid_t pid; + + fetchfd = -1; + if (got_parse_uri(uri, proto, host, port, path, repo) == -1) + return got_error(GOT_ERR_PARSE_URI); + if (dirname == NULL) + dirname = repo; + err = got_repo_init(dirname); + if (err != NULL) + return err; + if (chdir(dirname)) + return got_error_from_errno("enter new repo"); + if (mkpath(".git/objects/pack") == -1) + return got_error_from_errno("mkpath"); + packfd = open(".git/objects/pack/fetching.pack", O_CREAT|O_RDWR, 0644); + if (packfd == -1) + return got_error_from_errno("open pack"); + npackfd = dup(packfd); + if (npackfd == -1) + return got_error_from_errno("dup"); + idxfd = open(".git/objects/pack/fetching.idx", O_CREAT|O_RDWR, 0644); + if (idxfd == -1) + return got_error_from_errno("open pack"); + nidxfd = dup(idxfd); + if (nidxfd == -1) + return got_error_from_errno("dup"); + + if(strcmp(proto, "ssh") == 0 || strcmp(proto, "git+ssh") == 0) + fetchfd = got_dial_ssh(host, port, path, "upload"); + //else if(strcmp(proto, "git") == 0) + // fetchfd = got_dial_git(host, port, path, "upload"); + else if(strcmp(proto, "http") == 0 || strcmp(proto, "git+http") == 0) + err = got_error(GOT_ERR_BAD_PROTO); + else + err = got_error(GOT_ERR_BAD_PROTO); + + if (fetchfd == -1) + err = got_error_from_errno("dial uri"); + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fetchfds) == -1) + return got_error_from_errno("socketpair"); + + pid = fork(); + if (pid == -1) + return got_error_from_errno("fork"); + else if (pid == 0) + got_privsep_exec_child(imsg_fetchfds, GOT_PATH_PROG_FETCH_PACK, "."); + + if (close(imsg_fetchfds[1]) != 0) + return got_error_from_errno("close"); + imsg_init(&ibuf, imsg_fetchfds[0]); + err = got_privsep_send_fetch_req(&ibuf, fetchfd); + if (err != NULL) + return err; + err = got_privsep_wait_ack(&ibuf); + if (err != NULL) + return err; + err = got_privsep_send_tmpfd(&ibuf, npackfd); + if (err != NULL) + return err; + npackfd = dup(packfd); + if (npackfd == -1) + return got_error_from_errno("dup"); + err = got_privsep_wait_fetch_done(&ibuf, &packhash); + if (err != NULL) + return err; + if (waitpid(pid, &status, 0) == -1) + return got_error_from_errno("child exit"); + + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_idxfds) == -1) + return got_error_from_errno("socketpair"); + pid = fork(); + if (pid == -1) + return got_error_from_errno("fork"); + else if (pid == 0) + got_privsep_exec_child(imsg_idxfds, GOT_PATH_PROG_INDEX_PACK, "."); + if (close(imsg_idxfds[1]) != 0) + return got_error_from_errno("close"); + imsg_init(&ibuf, imsg_idxfds[0]); + + err = got_privsep_send_index_pack_req(&ibuf, npackfd, packhash); + if (err != NULL) + return err; + err = got_privsep_wait_ack(&ibuf); + if (err != NULL) + return err; + err = got_privsep_send_tmpfd(&ibuf, nidxfd); + if (err != NULL) + return err; + err = got_privsep_wait_index_pack_done(&ibuf); + if (err != NULL) + return err; + imsg_clear(&ibuf); + if (close(imsg_idxfds[0]) == -1) + return got_error_from_errno("close child"); + if (waitpid(pid, &status, 0) == -1) + return got_error_from_errno("child exit"); + + + return NULL; + +} diff --git a/lib/got_lib_object_idset.h b/lib/got_lib_object_idset.h index 6ae68d9..6dbd75c 100644 --- a/lib/got_lib_object_idset.h +++ b/lib/got_lib_object_idset.h @@ -26,6 +26,8 @@ const struct got_error *got_object_idset_remove(void **, struct got_object_idset *, struct got_object_id *); int got_object_idset_contains(struct got_object_idset *, struct got_object_id *); +void *got_object_idset_lookup_data(struct got_object_idset *, + struct got_object_id *); const struct got_error *got_object_idset_for_each(struct got_object_idset *, const struct got_error *(*cb)(struct got_object_id *, void *, void *), void *); diff --git a/lib/got_lib_privsep.h b/lib/got_lib_privsep.h index b27325d..6789c93 100644 --- a/lib/got_lib_privsep.h +++ b/lib/got_lib_privsep.h @@ -42,6 +42,8 @@ #define GOT_PROG_READ_TAG got-read-tag #define GOT_PROG_READ_PACK got-read-pack #define GOT_PROG_READ_GITCONFIG got-read-gitconfig +#define GOT_PROG_FETCH_PACK got-fetch-pack +#define GOT_PROG_SEND_PACK got-send-pack #define GOT_STRINGIFY(x) #x #define GOT_STRINGVAL(x) GOT_STRINGIFY(x) @@ -61,6 +63,10 @@ GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_PACK) #define GOT_PATH_PROG_READ_GITCONFIG \ GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_READ_GITCONFIG) +#define GOT_PATH_PROG_FETCH_PACK \ + GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_FETCH_PACK) +#define GOT_PATH_PROG_SEND_PACK \ + GOT_STRINGVAL(GOT_LIBEXECDIR) "/" GOT_STRINGVAL(GOT_PROG_SEND_PACK) struct got_privsep_child { int imsg_fd; @@ -98,6 +104,12 @@ enum got_imsg_type { GOT_IMSG_TAG, GOT_IMSG_TAG_TAGMSG, + /* Messages related to networking. */ + GOT_IMSG_FETCH_REQUEST, + GOT_IMSG_FETCH_DONE, + GOT_IMSG_IDXPACK_REQUEST, + GOT_IMSG_IDXPACK_DONE, + /* Messages related to pack files. */ GOT_IMSG_PACKIDX, GOT_IMSG_PACK, @@ -106,6 +118,7 @@ enum got_imsg_type { /* Message sending file descriptor to a temporary file. */ GOT_IMSG_TMPFD, + /* Messages related to gitconfig files. */ GOT_IMSG_GITCONFIG_PARSE_REQUEST, GOT_IMSG_GITCONFIG_REPOSITORY_FORMAT_VERSION_REQUEST, @@ -272,6 +285,12 @@ const struct got_error *got_privsep_send_blob_outfd(struct imsgbuf *, int); const struct got_error *got_privsep_send_tmpfd(struct imsgbuf *, int); const struct got_error *got_privsep_send_obj(struct imsgbuf *, struct got_object *); +const struct got_error *got_privsep_send_index_pack_req(struct imsgbuf *, int); +const struct got_error *got_privsep_send_index_pack_done(struct imsgbuf *, + struct got_object_id*); +const struct got_error *got_privsep_send_fetch_req(struct imsgbuf *, int); +const struct got_error *got_privsep_send_fetch_done(struct imsgbuf *); +const struct got_error *got_privsep_wait_fetch_done(struct imsgbuf *); const struct got_error *got_privsep_get_imsg_obj(struct got_object **, struct imsg *, struct imsgbuf *); const struct got_error *got_privsep_recv_obj(struct got_object **, diff --git a/lib/inflate.c b/lib/inflate.c index 3986b17..ebcdf12 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -86,12 +86,16 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp) size_t last_total_out = zb->z.total_out; z_stream *z = &zb->z; int ret = Z_ERRNO; + off_t off, consumed; z->next_out = zb->outbuf; z->avail_out = zb->outlen; *outlenp = 0; + off = ftello(f); + consumed = 0; do { + size_t last_total_in = zb->z.total_in; if (z->avail_in == 0) { size_t n = fread(zb->inbuf, 1, zb->inlen, f); if (n == 0) { @@ -105,6 +109,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp) z->avail_in = n; } ret = inflate(z, Z_SYNC_FLUSH); + consumed += z->total_in - last_total_in; } while (ret == Z_OK && z->avail_out > 0); if (ret == Z_OK) { @@ -116,6 +121,7 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp) } *outlenp = z->total_out - last_total_out; + fseek(f, off + consumed, SEEK_SET); return NULL; } diff --git a/lib/object.c b/lib/object.c index 4aaeb8a..2044c22 100644 --- a/lib/object.c +++ b/lib/object.c @@ -51,6 +51,7 @@ #include "got_lib_object_cache.h" #include "got_lib_object_parse.h" #include "got_lib_pack.h" +#include "got_lib_fetch.h" #include "got_lib_repository.h" #ifndef MIN diff --git a/lib/object_idset.c b/lib/object_idset.c index 527383c..510b59e 100644 --- a/lib/object_idset.c +++ b/lib/object_idset.c @@ -168,6 +168,14 @@ got_object_idset_contains(struct got_object_idset *set, return entry ? 1 : 0; } +void * +got_object_idset_lookup_data(struct got_object_idset *set, + struct got_object_id *id) +{ + struct got_object_idset_element *entry = find_element(set, id); + return entry ? entry->data : NULL; +} + const struct got_error * got_object_idset_for_each(struct got_object_idset *set, const struct got_error *(*cb)(struct got_object_id *, void *, void *), diff --git a/lib/privsep.c b/lib/privsep.c index f31cafb..599d5eb 100644 --- a/lib/privsep.c +++ b/lib/privsep.c @@ -401,6 +401,44 @@ got_privsep_send_obj(struct imsgbuf *ibuf, struct got_object *obj) return flush_imsg(ibuf); } +const struct got_error * +got_privsep_send_fetch_req(struct imsgbuf *ibuf, int fd) +{ + const struct got_error *err = NULL; + + if (imsg_compose(ibuf, GOT_IMSG_FETCH_REQUEST, 0, 0, fd, + NULL, 0) == -1) { + err = got_error_from_errno("imsg_compose FETCH_REQUEST"); + close(fd); + return err; + } + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_fetch_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_FETCH_DONE, 0, 0, -1, NULL, 0) == -1) + return got_error_from_errno("imsg_compose FETCH"); + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_wait_fetch_done(struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + struct imsg imsg; + + err = got_privsep_recv_imsg(&imsg, ibuf, 0); + if (err) + return err; + if (imsg.hdr.type == GOT_IMSG_FETCH_DONE) + return NULL; + else + return got_error(GOT_ERR_PRIVSEP_MSG); + imsg_free(&imsg); +} + const struct got_error * got_privsep_get_imsg_obj(struct got_object **obj, struct imsg *imsg, struct imsgbuf *ibuf) diff --git a/libexec/Makefile b/libexec/Makefile index a4c900b..ddc207c 100644 --- a/libexec/Makefile +++ b/libexec/Makefile @@ -1,4 +1,4 @@ SUBDIR = got-read-blob got-read-commit got-read-object got-read-tree \ - got-read-tag got-read-pack got-read-gitconfig + got-read-tag got-fetch-pack got-read-pack got-read-gitconfig .include <bsd.subdir.mk> diff --git a/libexec/got-index-pack/got-index-pack.c b/libexec/got-index-pack/got-index-pack.c new file mode 100644 index 0000000..3bf2502 --- /dev/null +++ b/libexec/got-index-pack/got-index-pack.c @@ -0,0 +1,1254 @@ +#include <sys/queue.h> +#include <sys/stat.h> +#include <sys/syslimits.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/uio.h> + +#include <stdint.h> +#include <errno.h> +#include <imsg.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <sha1.h> +#include <fcntl.h> +#include <zlib.h> +#include <err.h> +#include <assert.h> +#include <dirent.h> + +#include "got_error.h" +#include "got_object.h" + +#include "got_lib_sha1.h" +#include "got_lib_delta.h" +#include "got_lib_inflate.h" +#include "got_lib_object.h" +#include "got_lib_object_parse.h" +#include "got_lib_object_idset.h" +#include "got_lib_privsep.h" + +typedef struct Cinfo Cinfo; +typedef struct Tinfo Tinfo; +typedef struct Object Object; +typedef struct Pack Pack; +typedef struct Buf Buf; +typedef struct Dirent Dirent; +typedef struct Idxent Idxent; +typedef struct Ols Ols; + +enum { + /* 5k objects should be enough */ + Cachemax = 5*1024, + Pathmax = 512, + Hashsz = 20, + Pktmax = 65536, + + Nproto = 16, + Nport = 16, + Nhost = 256, + Npath = 128, + Nrepo = 64, + Nbranch = 32, +}; + +typedef enum Type { + GNone = 0, + GCommit = 1, + GTree = 2, + GBlob = 3, + GTag = 4, + GOdelta = 6, + GRdelta = 7, +} Type; + +enum { + Cloaded = 1 << 0, + Cidx = 1 << 1, + Ccache = 1 << 2, + Cexist = 1 << 3, + Cparsed = 1 << 5, +}; + +struct Dirent { + char *name; + int modref; + int mode; + struct got_object_id h; +}; + +struct Object { + /* Git data */ + struct got_object_id hash; + Type type; + + /* Cache */ + int id; + int flag; + int refs; + Object *next; + Object *prev; + + /* For indexing */ + off_t off; + off_t len; + uint32_t crc; + + /* Everything below here gets cleared */ + char *all; + char *data; + /* size excludes header */ + off_t size; + + union { + Cinfo *commit; + Tinfo *tree; + }; +}; + +struct Tinfo { + /* Tree */ + Dirent *ent; + int nent; +}; + +struct Cinfo { + /* Commit */ + struct got_object_id *parent; + int nparent; + struct got_object_id tree; + char *author; + char *committer; + char *msg; + int nmsg; + off_t ctime; + off_t mtime; +}; + +typedef struct Buf Buf; + +struct Buf { + int len; + int sz; + char *data; +}; + +static int readpacked(FILE *, Object *, int); +static Object *readidxobject(FILE *, struct got_object_id, int); + +struct got_object_idset *objcache; +int next_object_id; +Object *lruhead; +Object *lrutail; +int ncache; + +#define GETBE16(b)\ + ((((b)[0] & 0xFFul) << 8) | \ + (((b)[1] & 0xFFul) << 0)) + +#define GETBE32(b)\ + ((((b)[0] & 0xFFul) << 24) | \ + (((b)[1] & 0xFFul) << 16) | \ + (((b)[2] & 0xFFul) << 8) | \ + (((b)[3] & 0xFFul) << 0)) +#define GETBE64(b)\ + ((((b)[0] & 0xFFull) << 56) | \ + (((b)[1] & 0xFFull) << 48) | \ + (((b)[2] & 0xFFull) << 40) | \ + (((b)[3] & 0xFFull) << 32) | \ + (((b)[4] & 0xFFull) << 24) | \ + (((b)[5] & 0xFFull) << 16) | \ + (((b)[6] & 0xFFull) << 8) | \ + (((b)[7] & 0xFFull) << 0)) + +#define PUTBE16(b, n)\ + do{ \ + (b)[0] = (n) >> 8; \ + (b)[1] = (n) >> 0; \ + } while(0) + +#define PUTBE32(b, n)\ + do{ \ + (b)[0] = (n) >> 24; \ + (b)[1] = (n) >> 16; \ + (b)[2] = (n) >> 8; \ + (b)[3] = (n) >> 0; \ + } while(0) + +#define PUTBE64(b, n)\ + do{ \ + (b)[0] = (n) >> 56; \ + (b)[1] = (n) >> 48; \ + (b)[2] = (n) >> 40; \ + (b)[3] = (n) >> 32; \ + (b)[4] = (n) >> 24; \ + (b)[5] = (n) >> 16; \ + (b)[6] = (n) >> 8; \ + (b)[7] = (n) >> 0; \ + } while(0) + +static int +charval(int c, int *err) +{ + if(c >= '0' && c <= '9') + return c - '0'; + if(c >= 'a' && c <= 'f') + return c - 'a' + 10; + if(c >= 'A' && c <= 'F') + return c - 'A' + 10; + *err = 1; + return -1; +} + +static int +hparse(struct got_object_id *h, char *b) +{ + int i, err; + + err = 0; + for(i = 0; i < sizeof(h->sha1); i++){ + err = 0; + h->sha1[i] = 0; + h->sha1[i] |= ((charval(b[2*i], &err) & 0xf) << 4); + h->sha1[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0); + if(err) + return -1; + } + return 0; +} + +static void * +emalloc(size_t n) +{ + void *v; + + v = calloc(n, 1); + if(v == NULL) + err(1, "malloc:"); + return v; +} + +static void * +erealloc(void *p, ulong n) +{ + void *v; + + v = realloc(p, n); + if(v == NULL) + err(1, "realloc:"); + memset(v, 0, n); + return v; +} + +static int +hasheq(struct got_object_id *a, struct got_object_id *b) +{ + return memcmp(a->sha1, b->sha1, sizeof(a->sha1)) == 0; +} + +static char * +typestr(int t) +{ + char *types[] = { + "???", + "commit", + "tree", + "blob", + "tag", + "odelta", + "rdelta", + }; + if (t < 0 || t >= sizeof(types)/sizeof(types[0])) + abort(); + return types[t]; +} + +static char * +hashfmt(char *out, size_t nout, struct got_object_id *h) +{ + int i, n, c0, c1; + char *p; + + if (nout < 2*sizeof(h->sha1) + 1) + return NULL; + p = out; + for(i = 0; i < sizeof(h->sha1); i++){ + n = (h->sha1[i] >> 4) & 0xf; + c0 = (n >= 10) ? n-10 + 'a' : n + '0'; + n = h->sha1[i] & 0xf; + c1 = (n >= 10) ? n-10 + 'a' : n + '0'; + *p++ = c0; + *p++ = c1; + } + *p++ = 0; + return out; +} + +static void +clear(Object *o) +{ + if(!o) + return; + + assert(o->refs == 0); + assert((o->flag & Ccache) == 0); + assert(o->flag & Cloaded); + switch(o->type){ + case GCommit: + if(!o->commit) + break; + free(o->commit->parent); + free(o->commit->author); + free(o->commit->committer); + free(o->commit); + o->commit = NULL; + break; + case GTree: + if(!o->tree) + break; + free(o->tree->ent); + free(o->tree); + o->tree = NULL; + break; + default: + break; + } + + free(o->all); + o->all = NULL; + o->data = NULL; + o->flag &= ~Cloaded; +} + +static void +unref(Object *o) +{ + if(!o) + return; + o->refs--; + if(!o->refs) + clear(o); +} + +static Object* +ref(Object *o) +{ + o->refs++; + return o; +} + +static void +cache(Object *o) +{ + char buf[41]; + Object *p; + + hashfmt(buf, sizeof(buf), &o->hash); + if(o == lruhead) + return; + if(o == lrutail) + lrutail = lrutail->prev; + if(!(o->flag & Cexist)){ + got_object_idset_add(objcache, &o->hash, o); + o->id = next_object_id++; + o->flag |= Cexist; + } + if(o->prev) + o->prev->next = o->next; + if(o->next) + o->next->prev = o->prev; + if(lrutail == o){ + lrutail = o->prev; + lrutail->next = NULL; + }else if(!lrutail) + lrutail = o; + if(lruhead) + lruhead->prev = o; + o->next = lruhead; + o->prev = NULL; + lruhead = o; + + if(!(o->flag & Ccache)){ + o->flag |= Ccache; + ref(o); + ncache++; + } + while(ncache > Cachemax){ + p = lrutail; + lrutail = p->prev; + lrutail->next = NULL; + p->flag &= ~Ccache; + p->prev = NULL; + p->next = NULL; + unref(p); + ncache--; + } +} + +static int +preadbe32(FILE *b, int *v, off_t off) +{ + char buf[4]; + + if(fseek(b, off, 0) == -1) + return -1; + if(fread(buf, 1, sizeof(buf), b) == -1) + return -1; + *v = GETBE32(buf); + + return 0; +} +static int +preadbe64(FILE *b, off_t *v, off_t off) +{ + char buf[8]; + + if(fseek(b, off, 0) == -1) + return -1; + if(fread(buf, 1, sizeof(buf), b) == -1) + return -1; + *v = GETBE64(buf); + return 0; +} + +static int +readvint(char *p, char **pp) +{ + int i, n, c; + + i = 0; + n = 0; + do { + c = *p++; + n |= (c & 0x7f) << i; + i += 7; + } while (c & 0x80); + *pp = p; + + return n; +} + +static int +applydelta(Object *dst, Object *base, char *d, int nd) +{ + char *r, *b, *ed, *er; + int n, nr, c; + off_t o, l; + + ed = d + nd; + b = base->data; + n = readvint(d, &d); + if(n != base->size){ + fprintf(stderr, "mismatched source size"); + return -1; + } + + nr = readvint(d, &d); + r = emalloc(nr + 64); + n = snprintf(r, 64, "%s %d", typestr(base->type), nr) + 1; + dst->all = r; + dst->type = base->type; + dst->data = r + n; + dst->size = nr; + er = dst->data + nr; + r = dst->data; + + while(1){ + if(d == ed) + break; + c = *d++; + if(!c){ + fprintf(stderr, "bad delta encoding"); + return -1; + } + /* copy from base */ + if(c & 0x80){ + o = 0; + l = 0; + /* Offset in base */ + if(c & 0x01 && d != ed) o |= (*d++ << 0) & 0x000000ff; + if(c & 0x02 && d != ed) o |= (*d++ << 8) & 0x0000ff00; + if(c & 0x04 && d != ed) o |= (*d++ << 16) & 0x00ff0000; + if(c & 0x08 && d != ed) o |= (*d++ << 24) & 0xff000000; + + /* Length to copy */ + if(c & 0x10 && d != ed) l |= (*d++ << 0) & 0x0000ff; + if(c & 0x20 && d != ed) l |= (*d++ << 8) & 0x00ff00; + if(c & 0x40 && d != ed) l |= (*d++ << 16) & 0xff0000; + if(l == 0) l = 0x10000; + + assert(o + l <= base->size); + memmove(r, b + o, l); + r += l; + /* inline data */ + }else{ + memmove(r, d, c); + d += c; + r += c; + } + + } + if(r != er){ + fprintf(stderr, "truncated delta (%zd)", er - r); + return -1; + } + + return nr; +} + +static int +readrdelta(FILE *f, Object *o, int nd, int flag) +{ + const struct got_error *e; + struct got_object_id h; + Object *b; + uint8_t *d; + size_t n; + + d = NULL; + if(fread(h.sha1, 1, sizeof(h.sha1), f) != sizeof(h.sha1)) + goto error; + if(hasheq(&o->hash, &h)) + goto error; + if ((e = got_inflate_to_mem(&d, &n, f)) != NULL) + goto error; + o->len = ftello(f) - o->off; + if(d == NULL || n != nd) + goto error; + if((b = readidxobject(f, h, flag)) == NULL) + goto error; + if(applydelta(o, b, d, n) == -1) + goto error; + free(d); + return 0; +error: + free(d); + return -1; +} + +static int +readodelta(FILE *f, Object *o, off_t nd, off_t p, int flag) +{ + Object b; + uint8_t *d; + off_t r; + size_t n; + int c; + + r = 0; + d = NULL; + while(1){ + if((c = fgetc(f)) == -1) + goto error; + r |= c & 0x7f; + if (!(c & 0x80)) + break; + r++; + r <<= 7; + }while(c & 0x80); + + if(r > p){ + fprintf(stderr, "junk offset -%lld (from %lld)", r, p); + goto error; + } + + if (got_inflate_to_mem(&d, &n, f) == NULL) + goto error; + o->len = ftello(f) - o->off; + if(d == NULL || n != nd) + goto error; + if(fseek(f, p - r, 0) == -1) + goto error; + if(readpacked(f, &b, flag) == -1) + goto error; + if(applydelta(o, &b, d, nd) == -1) + goto error; + free(d); + return 0; +error: + free(d); + return -1; +} + +static int +readpacked(FILE *f, Object *o, int flag) +{ + const struct got_error *e; + int c, s, n; + off_t l, p; + size_t ndata; + uint8_t *data; + Type t; + Buf b; + + p = ftello(f); + c = fgetc(f); + if(c == -1) + return -1; + l = c & 0xf; + s = 4; + t = (c >> 4) & 0x7; + if(!t){ + fprintf(stderr, "unknown type for byte %x", c); + return -1; + } + while(c & 0x80){ + if((c = fgetc(f)) == -1) + return -1; + l |= (c & 0x7f) << s; + s += 7; + } + + switch(t){ + default: + fprintf(stderr, "invalid object at %lld", ftello(f)); + return -1; + case GCommit: + case GTree: + case GTag: + case GBlob: + b.sz = 64 + l; + + b.data = emalloc(b.sz); + n = snprintf(b.data, 64, "%s %lld", typestr(t), l) + 1; + b.len = n; + e = got_inflate_to_mem(&data, &ndata, f); + if (e != NULL || n + ndata >= b.sz) { + free(b.data); + return -1; + } + memcpy(b.data + n, data, ndata); + o->len = ftello(f) - o->off; + o->type = t; + o->all = b.data; + o->data = b.data + n; + o->size = ndata; + free(data); + break; + case GOdelta: + if(readodelta(f, o, l, p, flag) == -1) + return -1; + break; + case GRdelta: + if(readrdelta(f, o, l, flag) == -1) + return -1; + break; + } + o->flag |= Cloaded|flag; + return 0; +} + +static int +readloose(FILE *f, Object *o, int flag) +{ + struct { char *tag; int type; } *p, types[] = { + {"blob", GBlob}, + {"tree", GTree}, + {"commit", GCommit}, + {"tag", GTag}, + {NULL}, + }; + char *s, *e; + uint8_t *d; + off_t sz; + size_t n; + int l; + + if (got_inflate_to_mem(&d, &n, f) != NULL) + return -1; + + s = (char *)d; + o->type = GNone; + for(p = types; p->tag; p++){ + l = strlen(p->tag); + if(strncmp(s, p->tag, l) == 0){ + s += l; + o->type = p->type; + while(!isspace(*s)) + s++; + break; + } + } + if(o->type == GNone){ + free(o->data); + return -1; + } + sz = strtol(s, &e, 0); + if(e == s || *e++ != 0){ + fprintf(stderr, "malformed object header"); + goto error; + } + if(sz != n - (e - (char *)d)){ + fprintf(stderr, "mismatched sizes"); + goto error; + } + o->size = sz; + o->data = e; + o->all = d; + o->flag |= Cloaded|flag; + return 0; + +error: + free(d); + return -1; +} + +static off_t +searchindex(FILE *f, struct got_object_id h) +{ + int lo, hi, idx, i, nent; + off_t o, oo; + struct got_object_id hh; + + o = 8; + /* + * Read the fanout table. The fanout table + * contains 256 entries, corresponsding to + * the first byte of the hash. Each entry + * is a 4 byte big endian integer, containing + * the total number of entries with a leading + * byte <= the table index, allowing us to + * rapidly do a binary search on them. + */ + if (h.sha1[0] == 0){ + lo = 0; + if(preadbe32(f, &hi, o) == -1) + goto err; + } else { + o += h.sha1[0]*4 - 4; + if(preadbe32(f, &lo, o + 0) == -1) + goto err; + if(preadbe32(f, &hi, o + 4) == -1) + goto err; + } + if(hi == lo) + goto notfound; + if(preadbe32(f, &nent, 8 + 255*4) == -1) + goto err; + + /* + * Now that we know the range of hashes that the + * entry may exist in, read them in so we can do + * a bsearch. + */ + idx = -1; + fseek(f, Hashsz*lo + 8 + 256*4, 0); + for(i = 0; i < hi - lo; i++){ + if(fread(hh.sha1, 1, sizeof(hh.sha1), f) == -1) + goto err; + if(hasheq(&hh, &h)) + idx = lo + i; + } + if(idx == -1) + goto notfound; + + + /* + * We found the entry. If it's 32 bits, then we + * can just return the oset, otherwise the 32 + * bit entry contains the oset to the 64 bit + * entry. + */ + oo = 8; /* Header */ + oo += 256*4; /* Fanout table */ + oo += Hashsz*nent; /* Hashes */ + oo += 4*nent; /* Checksums */ + oo += 4*idx; /* Offset offset */ + if(preadbe32(f, &i, oo) == -1) + goto err; + o = i & 0xffffffff; + if(o & (1ull << 31)){ + o &= 0x7fffffff; + if(preadbe64(f, &o, o) == -1) + goto err; + } + return o; + +err: + fprintf(stderr, "unable to read packfile\n"); + return -1; +notfound: + { + char hstr[41]; + hashfmt(hstr, sizeof(hstr), &h); + fprintf(stdout, "could not find object %s\n", hstr); + } + return -1; +} + +/* + * Scans for non-empty word, copying it into buf. + * Strips off word, leading, and trailing space + * from input. + * + * Returns -1 on empty string or error, leaving + * input unmodified. + */ +static int +scanword(char **str, int *nstr, char *buf, int nbuf) +{ + char *p; + int n, r; + + r = -1; + p = *str; + n = *nstr; + while(n && isblank(*p)){ + n--; + p++; + } + + for(; n && *p && !isspace(*p); p++, n--){ + r = 0; + *buf++ = *p; + nbuf--; + if(nbuf == 0) + return -1; + } + while(n && isblank(*p)){ + n--; + p++; + } + *buf = 0; + *str = p; + *nstr = n; + return r; +} + +static void +nextline(char **str, int *nstr) +{ + char *s; + + if((s = strchr(*str, '\n')) != NULL){ + *nstr -= s - *str + 1; + *str = s + 1; + } +} + +static int +parseauthor(char **str, int *nstr, char **name, off_t *time) +{ + return 0; +} + +static void +parsecommit(Object *o) +{ + char *p, *t, buf[128]; + int np; + + p = o->data; + np = o->size; + o->commit = emalloc(sizeof(Cinfo)); + while(1){ + if(scanword(&p, &np, buf, sizeof(buf)) == -1) + break; + if(strcmp(buf, "tree") == 0){ + if(scanword(&p, &np, buf, sizeof(buf)) == -1) + errx(1, "invalid commit: tree missing"); + if(hparse(&o->commit->tree, buf) == -1) + errx(1, "invalid commit: garbled tree"); + }else if(strcmp(buf, "parent") == 0){ + if(scanword(&p, &np, buf, sizeof(buf)) == -1) + errx(1, "invalid commit: missing parent"); + o->commit->parent = realloc(o->commit->parent, ++o->commit->nparent * sizeof(struct got_object_id)); + if(!o->commit->parent) + err(1, "unable to malloc: "); + if(hparse(&o->commit->parent[o->commit->nparent - 1], buf) == -1) + errx(1, "invalid commit: garbled parent"); + }else if(strcmp(buf, "author") == 0){ + parseauthor(&p, &np, &o->commit->author, &o->commit->mtime); + }else if(strcmp(buf, "committer") == 0){ + parseauthor(&p, &np, &o->commit->committer, &o->commit->ctime); + }else if(strcmp(buf, "gpgsig") == 0){ + /* just drop it */ + if((t = strstr(p, "-----END PGP SIGNATURE-----")) == NULL) + errx(1, "malformed gpg signature"); + np -= t - p; + p = t; + } + nextline(&p, &np); + } + while (np && isspace(*p)) { + p++; + np--; + } + o->commit->msg = p; + o->commit->nmsg = np; +} + +static void +parsetree(Object *o) +{ + char *p, buf[256]; + int np, nn, m; + Dirent *t; + + p = o->data; + np = o->size; + o->tree = emalloc(sizeof(Tinfo)); + while(np > 0){ + if(scanword(&p, &np, buf, sizeof(buf)) == -1) + break; + o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent)); + t = &o->tree->ent[o->tree->nent - 1]; + memset(t, 0, sizeof(Dirent)); + m = strtol(buf, NULL, 8); + /* FIXME: symlinks and other BS */ + if(m == 0160000){ + t->mode |= S_IFDIR; + t->modref = 1; + } + t->mode = m & 0777; + if(m & 0040000) + t->mode |= S_IFDIR; + t->name = p; + nn = strlen(p) + 1; + p += nn; + np -= nn; + if(np < sizeof(t->h.sha1)) + errx(1, "malformed tree, remaining %d (%s)", np, p); + memcpy(t->h.sha1, p, sizeof(t->h.sha1)); + p += sizeof(t->h.sha1); + np -= sizeof(t->h.sha1); + } +} + +void +parseobject(Object *o) +{ + if(o->flag & Cparsed) + return; + switch(o->type){ + case GTree: parsetree(o); break; + case GCommit: parsecommit(o); break; + //case GTag: parsetag(o); break; + default: break; + } + o->flag |= Cparsed; +} + +static Object* +readidxobject(FILE *idx, struct got_object_id h, int flag) +{ + char path[Pathmax]; + char hbuf[41]; + FILE *f; + Object *obj; + int l, n; + off_t o; + struct dirent *ent; + DIR *d; + + + if ((obj = got_object_idset_lookup_data(objcache, &h))) { + if(obj->flag & Cloaded) + return obj; + if(obj->flag & Cidx){ + assert(idx != NULL); + o = ftello(idx); + if(fseek(idx, obj->off, 0) == -1) + errx(1, "could not seek to object offset"); + if(readpacked(idx, obj, flag) == -1) + errx(1, "could not reload object"); + if(fseek(idx, o, 0) == -1) + errx(1, "could not restore offset"); + cache(obj); + return obj; + } + } + + d = NULL; + /* We're not putting it in the cache yet... */ + obj = emalloc(sizeof(Object)); + obj->id = next_object_id + 1; + obj->hash = h; + + hashfmt(hbuf, sizeof(hbuf), &h); + snprintf(path, sizeof(path), ".git/objects/%c%c/%s", hbuf[0], hbuf[1], hbuf + 2); + if((f = fopen(path, "r")) != NULL){ + if(readloose(f, obj, flag) == -1) + goto error; + fclose(f); + parseobject(obj); + hashfmt(hbuf, sizeof(hbuf), &obj->hash); + fprintf(stderr, "object %s cached", hbuf); + cache(obj); + return obj; + } + + o = -1; + if ((d = opendir(".git/objects/pack")) == NULL) + err(1, "open pack dir"); + while ((ent = readdir(d)) != NULL) { + l = strlen(ent->d_name); + if(l > 4 && strcmp(ent->d_name + l - 4, ".idx") != 0) + continue; + snprintf(path, sizeof(path), ".git/objects/pack/%s", ent->d_name); + if((f = fopen(path, "r")) == NULL) + continue; + o = searchindex(f, h); + fclose(f); + if(o == -1) + continue; + break; + } + closedir(d); + + if (o == -1) + goto error; + + if((n = snprintf(path, sizeof(path), "%s", path)) >= sizeof(path) - 4) + goto error; + memcpy(path + n - 4, ".pack", 6); + if((f = fopen(path, "r")) == NULL) + goto error; + if(fseek(f, o, 0) == -1) + goto error; + if(readpacked(f, obj, flag) == -1) + goto error; + fclose(f); + parseobject(obj); + cache(obj); + return obj; +error: + free(obj); + return NULL; +} + +Object* +readobject(struct got_object_id h) +{ + Object *o; + + o = readidxobject(NULL, h, 0); + if(o) + ref(o); + return o; +} + +int +objcmp(const void *pa, const void *pb) +{ + Object *a, *b; + + a = *(Object**)pa; + b = *(Object**)pb; + return memcmp(a->hash.sha1, b->hash.sha1, sizeof(a->hash.sha1)); +} + +static int +hwrite(FILE *b, void *buf, int len, SHA1_CTX *ctx) +{ + SHA1Update(ctx, buf, len); + return fwrite(buf, 1, len, b); +} + +static uint32_t +objectcrc(FILE *f, Object *o) +{ + char buf[8096]; + int n, r; + + o->crc = 0; + fseek(f, o->off, 0); + for(n = o->len; n > 0; n -= r){ + r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f); + if(r == -1) + return -1; + if(r == 0) + return 0; + o->crc = crc32(o->crc, buf, r); + } + return 0; +} + +int +indexpack(int packfd, int idxfd, struct got_object_id packhash) +{ + char hdr[4*3], buf[8]; + int nobj, nvalid, nbig, n, i, step; + Object *o, **objects; + char *valid; + SHA1_CTX ctx, objctx; + FILE *f; + struct got_object_id h; + int c; + + if ((f = fdopen(packfd, "r")) == NULL) + return -1; + if (fseek(f, 0, SEEK_SET) == -1) + return -1; + if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) { + fprintf(stderr, "short read on header"); + return -1; + } + if (memcmp(hdr, "PACK\0\0\0\2", 8) != 0) { + fprintf(stderr, "invalid header"); + return -1; + } + + nvalid = 0; + nobj = GETBE32(hdr + 8); + objects = calloc(nobj, sizeof(Object*)); + valid = calloc(nobj, sizeof(char)); + step = nobj/100; + if(!step) + step++; + while (nvalid != nobj) { + fprintf(stderr, "indexing (%d/%d):", nvalid, nobj); + n = 0; + for (i = 0; i < nobj; i++) { + if (valid[i]) { + n++; + continue; + } + if (i % step == 0) + fprintf(stderr, "."); + if (!objects[i]) { + o = emalloc(sizeof(Object)); + o->off = ftello(f); + objects[i] = o; + } + o = objects[i]; + fseek(f, o->off, 0); + if (readpacked(f, o, Cidx) == 0){ + SHA1Init(&objctx); + SHA1Update(&objctx, (uint8_t*)o->all, o->size + strlen(o->all) + 1); + SHA1Final(o->hash.sha1, &objctx); + cache(o); + valid[i] = 1; + n++; + } + if(objectcrc(f, o) == -1) + return -1; + } + fprintf(stderr, "\n"); + if (n == nvalid) { + errx(1, "fix point reached too early: %d/%d", nvalid, nobj); + goto error; + } + nvalid = n; + } + fclose(f); + + SHA1Init(&ctx); + qsort(objects, nobj, sizeof(Object*), objcmp); + if((f = fdopen(idxfd, "w")) == NULL) + return -1; + if(hwrite(f, "\xfftOc\x00\x00\x00\x02", 8, &ctx) != 8) + goto error; + /* fanout table */ + c = 0; + for(i = 0; i < 256; i++){ + while(c < nobj && (objects[c]->hash.sha1[0] & 0xff) <= i) + c++; + PUTBE32(buf, c); + hwrite(f, buf, 4, &ctx); + } + for(i = 0; i < nobj; i++){ + o = objects[i]; + hwrite(f, o->hash.sha1, sizeof(o->hash.sha1), &ctx); + } + + /* pointless, nothing uses this */ + for(i = 0; i < nobj; i++){ + PUTBE32(buf, objects[i]->crc); + hwrite(f, buf, 4, &ctx); + } + + nbig = 0; + for(i = 0; i < nobj; i++){ + if(objects[i]->off <= (1ull<<31)) + PUTBE32(buf, objects[i]->off); + else + PUTBE32(buf, (1ull << 31) | nbig++); + hwrite(f, buf, 4, &ctx); + } + for(i = 0; i < nobj; i++){ + if(objects[i]->off > (1ull<<31)){ + PUTBE64(buf, objects[i]->off); + hwrite(f, buf, 8, &ctx); + } + } + hwrite(f, packhash.sha1, sizeof(packhash.sha1), &ctx); + SHA1Final(h.sha1, &ctx); + fwrite(h.sha1, 1, sizeof(h.sha1), f); + + free(objects); + free(valid); + fclose(f); + return 0; + +error: + free(objects); + free(valid); + fclose(f); + return -1; +} + +int +main(int argc, char **argv) +{ + const struct got_error *err = NULL; + struct got_object_id packhash; + struct imsgbuf ibuf; + struct imsg imsg; + int packfd, idxfd; + + objcache = got_object_idset_alloc(); + imsg_init(&ibuf, GOT_IMSG_FD_CHILD); + if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) { + if (err->code == GOT_ERR_PRIVSEP_PIPE) + err = NULL; + goto done; + } + if (imsg.hdr.type == GOT_IMSG_STOP) + goto done; + if (imsg.hdr.type != GOT_IMSG_IDXPACK_REQUEST) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + goto done; + } + if (imsg.hdr.len - IMSG_HEADER_SIZE != SHA1_DIGEST_LENGTH) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + goto done; + } + packfd = imsg.fd; + memcpy(packhash.sha1, imsg.data, SHA1_DIGEST_LENGTH); + got_privsep_send_ack(&ibuf); + + if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) { + if (err->code == GOT_ERR_PRIVSEP_PIPE) + err = NULL; + goto done; + } + if (imsg.hdr.type == GOT_IMSG_STOP) + goto done; + if (imsg.hdr.type != GOT_IMSG_TMPFD) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + goto done; + } + if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + goto done; + } + idxfd = imsg.fd; + + indexpack(packfd, idxfd, packhash); +done: + if(err != NULL) + got_privsep_send_error(&ibuf, err); + else + err = got_privsep_send_index_pack_done(&ibuf); + if(err != NULL) { + fprintf(stderr, "%s: %s\n", getprogname(), err->msg); + got_privsep_send_error(&ibuf, err); + } + + exit(0); +} -- Ori Bernstein
got clone: start of effort.