"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
fix email parsing
To:
gameoftrees@openbsd.org
Date:
Tue, 19 Jul 2022 00:31:23 +0200

Download raw body.

Thread
I was reading the documentation of git-fast-import when i stumbled
across this passage (in the description of the `commit' command in
git-fast-import(1).)

> Here <name> is the person's display name (for example "Com M Itter") and
> <email> is the person's email address ("cm@example.com").  LT and GT are
> the literal less-than (\x3c) and greater-than (\x3e) symbols. These are
> required to delimit the email address from the other fields in the line.
> Note that <name> and <email> are free-form and may contain any sequence
> of bytes, except LT, GT and LF. <name> is typically UTF-8 encoded.

this reminded me that our valid_author is both too loose and too strict:

 - line feeds are illegal in both the name and the email address (of
   course!) but we're allowing them
 - the '@' inside the email address is not mandatory (and at least
   fossil export --git doesn't include it -- in my test it exports
   commits done by "op <op>")
 - the mail can't contain <
 - doesn't ensure that there isn't gibberish after the email

diff below addresses these three points.  it backports some changes to
the error name/description and to the return type from my `got ci -A'
patch and conflicts with it.  At this poirt the email address check
shouldn't be "dumb" anymore as it's in par with the git one (from the
documentation at least, didn't even tried to look at what git actually
does under the hood.)

ok?

diff /home/op/w/got
commit - d2587c5f95c6edb51ccc8d4abfac838b58f3a463
path + /home/op/w/got
blob - b7c305dd8fba01e7b8b4d08eae548d3f7c53246c
file + got/got.c
--- got/got.c
+++ got/got.c
@@ -548,24 +548,23 @@ import_progress(void *arg, const char *path)
 	return NULL;
 }
 
-static int
+static const struct got_error *
 valid_author(const char *author)
 {
-	/*
-	 * Really dumb email address check; we're only doing this to
-	 * avoid git's object parser breaking on commits we create.
-	 */
-	while (*author && *author != '<')
+	const char *email = author;
+
+	while (*author && *author != '\n' && *author != '<')
 		author++;
-	if (*author != '<')
-		return 0;
-	while (*author && *author != '@')
+	if (*author++ != '<')
+		goto err;
+	while (*author && *author != '\n' && *author != '<' && *author != '>')
 		author++;
-	if (*author != '@')
-		return 0;
-	while (*author && *author != '>')
-		author++;
-	return *author == '>';
+	if (strcmp(author, ">") != 0)
+		goto err;
+	return NULL;
+
+err:
+	return got_error_fmt(GOT_ERR_BAD_EMAIL, email);
 }
 
 static const struct got_error *
@@ -625,8 +624,8 @@ get_author(char **author, struct got_repository *repo,
 	if (*author == NULL)
 		return got_error_from_errno("strdup");
 
-	if (!valid_author(*author)) {
-		err = got_error_fmt(GOT_ERR_COMMIT_NO_EMAIL, "%s", *author);
+	err = valid_author(*author);
+	if (err != NULL) {
 		free(*author);
 		*author = NULL;
 	}
blob - 9dcd5b85b181ee63d76a75bf16c9ae0dc46aaa26
file + include/got_error.h
--- include/got_error.h
+++ include/got_error.h
@@ -122,7 +122,7 @@
 #define GOT_ERR_FILE_NOT_STAGED 105
 #define GOT_ERR_STAGED_PATHS	106
 #define GOT_ERR_PATCH_CHOICE	107
-#define GOT_ERR_COMMIT_NO_EMAIL	108
+#define GOT_ERR_BAD_EMAIL	108
 #define GOT_ERR_TAG_EXISTS	109
 #define GOT_ERR_GIT_REPO_FORMAT	110
 #define GOT_ERR_REBASE_REQUIRED	111
blob - bde78fabea00a9c132a5ec2ea937fc0665428986
file + lib/error.c
--- lib/error.c
+++ lib/error.c
@@ -159,8 +159,7 @@ static const struct got_error got_errors[] = {
 	{ GOT_ERR_STAGED_PATHS, "work tree contains files with staged "
 	    "changes; these changes must be committed or unstaged first" },
 	{ GOT_ERR_PATCH_CHOICE, "invalid patch choice" },
-	{ GOT_ERR_COMMIT_NO_EMAIL, "commit author's email address is required "
-	    "for compatibility with Git" },
+	{ GOT_ERR_BAD_EMAIL, "invalid email address" },
 	{ GOT_ERR_TAG_EXISTS,"specified tag already exists" },
 	{ GOT_ERR_GIT_REPO_FORMAT,"unknown git repository format version" },
 	{ GOT_ERR_REBASE_REQUIRED,"specified branch must be rebased first" },
blob - 4fe2c68a57b89e39bec69f8c2dbcf24f0f265384
file + regress/cmdline/commit.sh
--- regress/cmdline/commit.sh
+++ regress/cmdline/commit.sh
@@ -653,10 +653,8 @@ test_commit_no_email() {
 		got commit -m 'test no email' > $testroot/stdout \
 		2> $testroot/stderr)
 
-	echo -n "got: :flan_hacker:: commit author's email address " \
+	echo "got: :flan_hacker:: invalid email address" \
 		> $testroot/stderr.expected
-	echo "is required for compatibility with Git" \
-		>> $testroot/stderr.expected
 	cmp -s $testroot/stderr.expected $testroot/stderr
 	ret=$?
 	if [ $ret -ne 0 ]; then
@@ -670,8 +668,58 @@ test_commit_no_email() {
 	ret=$?
 	if [ $ret -ne 0 ]; then
 		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" $ret
+		return 1
 	fi
-	test_done "$testroot" "$ret"
+
+	# try again with a newline inside the email
+	(cd $testroot/wt \
+		&& FS=' ' env GOT_AUTHOR="$(printf "Flan <hack\ner>")" \
+		got commit -m 'test invalid email' > $testroot/stdout \
+		2> $testroot/stderr)
+
+	printf "got: Flan <hack\ner>: invalid email address\n" \
+		> $testroot/stderr.expected
+	cmp -s $testroot/stderr.expected $testroot/stderr
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stderr.expected $testroot/stderr
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	echo -n > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	# try again with a < inside the email
+	(cd $testroot/wt \
+		&& FS=' ' env GOT_AUTHOR="$(printf "Flan <ha<ker>")" \
+		got commit -m 'test invalid email' > $testroot/stdout \
+		2> $testroot/stderr)
+
+	printf "got: Flan <ha<ker>: invalid email address\n" \
+		> $testroot/stderr.expected
+	cmp -s $testroot/stderr.expected $testroot/stderr
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stderr.expected $testroot/stderr
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	echo -n > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+	fi
+	test_done "$testroot" $ret
 }
 
 test_commit_tree_entry_sorting() {