Download raw body.
got: allow clone without symrefs
On Sat, 2025-06-14 at 17:19 +0200, Stefan Sperling wrote:
> On Sat, Jun 14, 2025 at 01:17:19AM +0200, Martijn van Duren wrote:
> > Hello all,
> >
> > Some time ago I got pointed to this repo[0] which doesn't want to clone
> > without specifying the specific branch. I have no particular usecase
> > for this repo, but the fact that it couldn't be checked out by default
> > kept irking me.
> >
> > After some digging I found out that we rely on symrefs to determine
> > the default branch. According to the protocol spec[1]:
> > Clients MAY use the parameters from this capability to select the
> > proper initial branch when cloning a repository.
> > So it's definitely not a bad thing to do. However, this particular host
> > doesn't send a symrefs and we're left hanging until you use -l to see
> > what's available, and then -b to manually specify the branch to check
> > out.
> >
> > The diff below adds a workaround for this situation by using the first
> > refs/heads/ whose id_str matches that of HEAD. It's definitely not
> > fool proof, but it at least covers this case.
>
> I am fine with this approach. This server does not advertise any symref
> capability, so what you are proposing is the best we can do.
>
> I see one wrinkle: your patch is blindly assuming that the ref advertised
> on the first line received from the server points at the "HEAD".
>
> The Git docs imply that HEAD doesn't necessarily need to exist (why that
> is I don't know -- probably their usual disregard of conventions in favour
> of extreme flexibility). Quoting gitprotocol-pack(5):
>
> If HEAD is a valid ref, HEAD MUST appear as the first advertised ref. If
> HEAD is not a valid ref, HEAD MUST NOT appear in the advertisement list
> at all, but other refs may still appear.
>
> To be honest, I expect that every useful repository out there will
> contain HEAD in practice, so your code will probably just work as it is.
> But in theory your patch makes us fetch some arbitrary branch by default,
> whichever the server announces first. If some server implementation were
> to advertise all refs in alphabetical order and omit HEAD, and people were
> adding new branches all day long, we could end up cloning a different
> default branch every time.
>
> I would suggest to check whether the variable refname equals "HEAD" in
> the code quoted below, and leave default_branch set to NULL if the refname
> is not "HEAD". This would keep the current behaviour intact in the edge
> case where no HEAD is advertised at all. We do want people to use the
> -b option in that case, rather than fetching something arbitrary.
The omission of the "HEAD" check was semi intentional as an additional
last ditch effort to retrieve something, but I definitely see where
you're coming from.
This diff works for me, and should make the default branch logic a
little more robust. But I'm not sure if the added complexity is worth
it:
- If we have a symref keep the logic as is.
- If we have a "HEAD", try to find a "master" or "main" branch of
the same id_str as "HEAD".
- If we have a "HEAD", but no id_str matching "master" or "main" branch,
default to the first "refs/heads/" branch that matches "HEAD"'s
id_str.
- If there is no "HEAD", try to find "master" or "main" branch of any
id_str.
- Give up and let the user use -b.
I guess that "master" and "main" are common enough as main branch to
keep those as fallback, but let's see what sticks :-)
Only lightly tested.
diff /home/martijn/src/got
path + /home/martijn/src/got
commit - e339aba5a8f332c98d0fa01f1d9a5096e89c6045
blob - 736c8e342f847a5c286727292fe961ed4fcac388
file + libexec/got-fetch-pack/got-fetch-pack.c
--- libexec/got-fetch-pack/got-fetch-pack.c
+++ libexec/got-fetch-pack/got-fetch-pack.c
@@ -333,6 +333,15 @@ done:
return err;
}
+static int
+ref_is_common_initial(const char *refname)
+{
+ if (strcmp(refname, "refs/heads/main") == 0 ||
+ strcmp(refname, "refs/heads/master") == 0)
+ return 1;
+ return 0;
+}
+
static const struct got_error *
fetch_pack(int fd, int packfd, uint8_t *pack_sha1,
struct got_pathlist_head *have_refs, int fetch_all_branches,
@@ -347,10 +356,12 @@ fetch_pack(int fd, int packfd, uint8_t *pack_sha1,
struct got_object_id *have, *want;
int is_firstpkt = 1, nref = 0, refsz = 16;
int i, n, nwant = 0, nhave = 0, acked = 0, eof = 0;
+ int gotsymref = 0, gothead = 0;
off_t packsz = 0, last_reported_packsz = 0;
char *id_str = NULL, *default_id_str = NULL, *refname = NULL;
char *server_capabilities = NULL, *my_capabilities = NULL;
const char *default_branch = NULL;
+ char *free_default_branch = NULL;
struct got_pathlist_head symrefs;
struct got_pathlist_entry *pe;
int sent_my_capabilites = 0, have_sidebands = 0;
@@ -433,8 +444,19 @@ fetch_pack(int fd, int packfd, uint8_t *pack_sha1,
if (strcmp(name, GOT_REF_HEAD) != 0)
continue;
default_branch = symref_target;
+ gotsymref = 1;
break;
}
+ if (!gotsymref &&
+ strcmp(refname, GOT_REF_HEAD) == 0) {
+ default_id_str = strdup(id_str);
+ if (default_id_str == NULL) {
+ err = got_error_from_errno(
+ "strdup");
+ goto done;
+ }
+ gothead = 1;
+ }
}
if (default_branch)
continue;
@@ -446,13 +468,54 @@ fetch_pack(int fd, int packfd, uint8_t *pack_sha1,
}
continue;
}
- if (default_branch && default_id_str == NULL &&
- strcmp(refname, default_branch) == 0) {
- default_id_str = strdup(id_str);
- if (default_id_str == NULL) {
- err = got_error_from_errno("strdup");
- goto done;
+ if (!gotsymref) {
+ if (gothead) {
+ if (default_branch == NULL &&
+ strncmp(refname, "refs/heads/", 11) == 0 &&
+ strcmp(id_str, default_id_str) == 0) {
+ free_default_branch = strdup(refname);
+ if (free_default_branch == NULL) {
+ err = got_error_from_errno(
+ "strdup");
+ goto done;
+ }
+ default_branch = free_default_branch;
+ } else if (free_default_branch &&
+ strcmp(id_str, default_id_str) == 0 &&
+ ref_is_common_initial(refname)) {
+ free(free_default_branch);
+ free_default_branch = strdup(refname);
+ if (free_default_branch == NULL) {
+ err = got_error_from_errno(
+ "strdup");
+ goto done;
+ }
+ default_branch = free_default_branch;
+ }
+ } else {
+ if (default_branch == NULL &&
+ default_id_str == NULL &&
+ ref_is_common_initial(refname)) {
+ free_default_branch = strdup(refname);
+ default_id_str = strdup(id_str);
+ if (free_default_branch == NULL ||
+ default_id_str == NULL) {
+ err = got_error_from_errno(
+ "strdup");
+ goto done;
+ }
+ default_branch = free_default_branch;
+ }
}
+ } else {
+ if (default_branch && default_id_str == NULL &&
+ strcmp(refname, default_branch) == 0) {
+ default_id_str = strdup(id_str);
+ if (default_id_str == NULL) {
+ err = got_error_from_errno("strdup");
+ goto done;
+ }
+ }
}
if (list_refs_only || strncmp(refname, "refs/tags/", 10) == 0) {
@@ -846,6 +909,7 @@ done:
free(have);
free(want);
free(id_str);
+ free(free_default_branch);
free(default_id_str);
free(refname);
free(server_capabilities);
got: allow clone without symrefs