"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: got {diff,log,update} -c KEYWORD (cf. svn revision keywords)
To:
Mark Jamsek <mark@jamsek.com>
Cc:
Stefan Sperling <stsp@stsp.name>, gameoftrees@openbsd.org
Date:
Thu, 13 Jul 2023 18:30:57 +0200

Download raw body.

Thread
On 2023/07/14 00:59:15 +1000, Mark Jamsek <mark@jamsek.com> wrote:
> The below diff makes the discussed changes to the keyword syntax, and
> adds the ":(+|-)[N]" modifier to reference names. This enables:
> 
> got up -cmain:-2	# 2nd gen ancestor of main
> got up -cfoo:+N		# Nth gen descendant of foo
> got up -c:base:+	# 1st gen descendant of work tree base commit
> got up -c:head:-4	# 4th gen ancestor of HEAD
> got up -cbase:-		# 1st gen descendant of ref "base"
> 
> Tests and docs have been tweaked to cover the changes, and I have also
> added a little cover for the newly added ability of reference modifiers
> (e.g., -cmain:+). However, I will expand regress to add some contrived
> and non-trivial cases before adding support for temporal expressions and
> further improvements. I wanted to get this out to confirm that we indeed
> do like the syntax and are happy with this direction.

briefily played with it but it's very nice to have, thank you!

I guess that we should eventually use this syntax everywhere we accept
a ref (more or less), so it could make sense to define the parsing
code somewhere in lib/ for easier reuse between got, tog, cvg and
gotadmin.

I can imagine myself using, say, `got patch -c ':last friday' <diff'
or `gotadmin dump -x main:-3 main >incremental.bundle'

some quick comments on the diff.

> --- got/got.c
> +++ got/got.c
> @@ -3465,7 +3466,244 @@ static const struct got_error *
>  	return err;
>  }
>  
> +/*
> + * Commit keywords to specify references in the repository
> + * (cf. svn keywords, fossil special tags, hg revsets).
> + */
> +#define GOT_KEYWORD_BASE	":base"	/* work tree base commit */
> +#define GOT_KEYWORD_HEAD	":head"	/* worktree/repo HEAD commit */
> +#define GOT_KEYWORD_BASE_LEN	(sizeof(GOT_KEYWORD_BASE) - 1)
> +#define GOT_KEYWORD_HEAD_LEN	(sizeof(GOT_KEYWORD_HEAD) - 1)
> +
> +struct keyword_mod {
> +	uint64_t	n;
> +	uint8_t		sym;
> +	uint8_t		iskeyword;
> +};
> +
> +#define GOT_KEYWORD_DESCENDANT	'+'
> +#define GOT_KEYWORD_ANCESTOR	'-'
> +
>  static const struct got_error *
> +keyword_modifier(struct keyword_mod *kwm, const char *keyword, size_t kwlen)
> +{
> +	const char	*p0, *p = keyword + kwlen -1;

we should guard from a kwlen of 0

> +	uint8_t		 symbol = 0;
> +
> +	/* check if keyword or modifier indicator is present */
> +	p0 = strchr(keyword, ':');
> +	if (p0 == NULL)
> +		return NULL;
> +
> +	kwm->iskeyword = 1;
> +
> +	/* find ancestor/descendant symbol at start of modifier: :(+|-)[N] */
> +	while (p > keyword && *p >= '0' && *p <= '9')
> +		--p;
> +	if (*p == GOT_KEYWORD_DESCENDANT || *p == GOT_KEYWORD_ANCESTOR)
> +		symbol = *p;
> +
> +	if (symbol) {
> +		unsigned long long	max;
> +		unsigned long long	n = 0;
> +		size_t			len = p - keyword;
> +
> +		max = ((unsigned long long)LLONG_MIN) / 10 + 1;
> +
> +		/*
> +		 * Don't overflow. Max valid request is whichever is greatest
> +		 * between the commit corresponding to keyword and either HEAD
> +		 * or ROOT, but just make sure not to overflow. If n extends
> +		 * beyond HEAD/ROOT, use HEAD/ROOT instead of a hard error.
> +		 */
> +		while (++p < keyword + kwlen) {
> +			if (n > max)
> +				return got_error_fmt(GOT_ERR_NO_SPACE,
> +				    "keyword modifier too large: %s", keyword);
> +			n = n * 10 + (*p - '0');
> +		}
> +
> +		if (n == 0 && len == kwlen - 1)
> +			n = 1;	/* keyword:(+/-) with no N == keyword:(+/-)1 */
> +
> +		kwm->n = n;
> +		kwm->sym = symbol;

this seems correct but it'd be easier to just strdup the string in the
parent and modify it in place here so that we can use strtonum() here.

> +	}
> +
> +	return NULL;
> +}
> +
> +/*
> + * Check if keyword matches "base" or "head" with optional trailing modifier:
> + *    keyword:(+/-)N	Nth generation descendant/antecedent of keyword
> + *    keyword:(+/-)	1st generation descendant/antecedent of keyword
> + *    keyword		commit pointed to by keyword
> + * Or if keyword is a valid reference _with_ a trailing modifier:
> + *    ref:(+/-)[N]	Nth generation descendant/antecedent of ref
> + * If a match is found, return the corresponding commit id string in *ret,
> + * of which the caller takes ownership and must free.
> + * Otherwise *ret will contain NULL to indicate a match was not found.
> + * If the modifier is greater than the number of ancestors/descendants,
> + * return the id string of the oldest/most recent commit (i.e., ROOT/HEAD).
> + */
> +static const struct got_error *
> +keyword_to_idstr(char **ret, const char *keyword, struct got_repository *repo,
> +    struct got_worktree *wt)
> +{
> +	const struct got_error		*err = NULL;
> +	struct got_commit_graph		*graph = NULL;
> +	struct got_object_id		*head_id = NULL, *kwid = NULL;
> +	struct got_object_id		 iter_id;
> +	struct got_reflist_head		 refs;
> +	struct got_object_id_queue	 commits;
> +	struct got_object_qid		*qid;
> +	struct keyword_mod		 kwm;
> +	char				*kwid_str = NULL;
> +	uint64_t			 n = 0;
> +
> +	*ret = NULL;
> +	TAILQ_INIT(&refs);
> +	STAILQ_INIT(&commits);
> +	memset(&kwm, 0, sizeof(kwm));
> +
> +	err = keyword_modifier(&kwm, keyword, strlen(keyword));
> +	if (err != NULL || !kwm.iskeyword)
> +		return err;
> +
> +	if (strncmp(keyword, GOT_KEYWORD_BASE, GOT_KEYWORD_BASE_LEN) == 0) {

there's a small issue: it accepts ":basefoo" as ":base" too.

I think it should be easier to strdup() the keyword at the start of
this function, then tokenize it and parse the various (NUL-terminated)
parts...

> +		if (wt == NULL)
> +			return got_error_msg(GOT_ERR_NOT_WORKTREE,
> +			    "'-c "GOT_KEYWORD_BASE"' requires work tree");
> +
> +		err = got_object_id_str(&kwid_str,
> +		    got_worktree_get_base_commit_id(wt));
> +		if (err != NULL)
> +			return err;
> +	} else if (strncmp(keyword,
> +	    GOT_KEYWORD_HEAD, GOT_KEYWORD_HEAD_LEN) == 0) {
> +		struct got_reference *head_ref;
> +
> +		err = got_ref_open(&head_ref, repo, wt != NULL ?
> +		    got_worktree_get_head_ref_name(wt) : GOT_REF_HEAD, 0);
> +		if (err != NULL)
> +			return err;
> +
> +		kwid_str = got_ref_to_str(head_ref);
> +		got_ref_close(head_ref);
> +		if (kwid_str == NULL)
> +			return got_error_from_errno("got_ref_to_str");
> +	} else if (*keyword == ':') {
> +		/* unknown keyword: caller will report "reference not found" */
> +		return NULL;
> +	} else {
> +		/* must be "reference:[(+|-)[N]]" */
> +		kwid_str = strdup(keyword);
> +		if (kwid_str == NULL)
> +			return got_error_from_errno("strdup");
> +		kwid_str[strcspn(kwid_str, ":")] = '\0';

...not unlike you're doing here.

> +	}
> +
> +	if (kwm.n == 0)
> +		goto done;	/* unmodified keyword */
> +
> +	err = got_ref_list(&refs, repo, NULL, got_ref_cmp_by_name, NULL);
> +	if (err)
> +		goto done;
> +
> +	err = got_repo_match_object_id(&kwid, NULL, kwid_str,
> +	    GOT_OBJ_TYPE_COMMIT, &refs, repo);
> +	if (err != NULL)
> +		goto done;
> +
> +	/*
> +	 * If looking for a descendant, we need to iterate from
> +	 * HEAD so grab its id now if it's not already in kwid.
> +	 */
> +	if (kwm.sym == GOT_KEYWORD_DESCENDANT &&
> +	    strncmp(keyword, GOT_KEYWORD_HEAD, GOT_KEYWORD_HEAD_LEN) != 0) {
> +		struct got_reference *head_ref;
> +
> +		err = got_ref_open(&head_ref, repo, wt != NULL ?
> +		    got_worktree_get_head_ref_name(wt) : GOT_REF_HEAD, 0);
> +		if (err != NULL)
> +			goto done;
> +		err = got_ref_resolve(&head_id, repo, head_ref);
> +		got_ref_close(head_ref);
> +		if (err != NULL)
> +			goto done;
> +	}
> +
> +	err = got_commit_graph_open(&graph, "/", 1);
> +	if (err)
> +		goto done;
> +
> +	err = got_commit_graph_iter_start(graph,
> +	    head_id != NULL ? head_id : kwid, repo, check_cancelled, NULL);
> +	if (err)
> +		goto done;
> +
> +	while (n <= kwm.n) {
> +		err = got_commit_graph_iter_next(&iter_id, graph, repo,
> +		    check_cancelled, NULL);
> +		if (err) {
> +			if (err->code == GOT_ERR_ITER_COMPLETED)
> +				err = NULL;
> +			break;
> +		}
> +
> +		if (kwm.sym == GOT_KEYWORD_DESCENDANT) {
> +			/*
> +			 * We want the Nth generation descendant of KEYWORD,
> +			 * so queue all commits from HEAD to KEYWORD then we
> +			 * can walk from KEYWORD to its Nth gen descendent.
> +			 */
> +			err = got_object_qid_alloc(&qid, &iter_id);
> +			if (err)
> +				goto done;
> +			STAILQ_INSERT_HEAD(&commits, qid, entry);
> +
> +			if (got_object_id_cmp(&iter_id, kwid) == 0)
> +				break;
> +			continue;
> +		}
> +		++n;
> +	}
> +
> +	if (kwm.sym == GOT_KEYWORD_DESCENDANT) {
> +		n = 0;
> +
> +		STAILQ_FOREACH(qid, &commits, entry) {
> +			if (qid == STAILQ_LAST(&commits, got_object_qid, entry)
> +			    || n == kwm.n)
> +				break;
> +			++n;
> +		}
> +
> +		memcpy(&iter_id, &qid->id, sizeof(iter_id));
> +
> +	}
> +
> +	free(kwid_str);
> +	err = got_object_id_str(&kwid_str, &iter_id);
> +
> +done:
> +	free(kwid);
> +	free(head_id);
> +	got_ref_list_free(&refs);
> +	got_object_id_queue_free(&commits);
> +	if (graph != NULL)
> +		got_commit_graph_close(graph);
> +
> +	if (err != NULL) {
> +		free(kwid_str);
> +		return err;
> +	}
> +
> +	*ret = kwid_str;
> +	return NULL;
> +}