"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: fix email parsing
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Tue, 19 Jul 2022 10:55:17 +0200

Download raw body.

Thread
Stefan Sperling <stsp@stsp.name> wrote:
> On Tue, Jul 19, 2022 at 12:31:23AM +0200, Omar Polo wrote:
> > @@ -548,24 +548,23 @@ import_progress(void *arg, const char *path)
> >  	return NULL;
> >  }
> >  
> > -static int
> > +static const struct got_error *
> >  valid_author(const char *author)
> >  {
> > -	/*
> > -	 * Really dumb email address check; we're only doing this to
> > -	 * avoid git's object parser breaking on commits we create.
> > -	 */
> 
> Please leave the part of this comment intact which says that these
> rules were made up by Git, not by us. Our own parser doesn't care
> how the author/committer name is formatted.
> 
> > blob - bde78fabea00a9c132a5ec2ea937fc0665428986
> > file + lib/error.c
> > --- lib/error.c
> > +++ lib/error.c
> > @@ -159,8 +159,7 @@ static const struct got_error got_errors[] = {
> >  	{ GOT_ERR_STAGED_PATHS, "work tree contains files with staged "
> >  	    "changes; these changes must be committed or unstaged first" },
> >  	{ GOT_ERR_PATCH_CHOICE, "invalid patch choice" },
> > -	{ GOT_ERR_COMMIT_NO_EMAIL, "commit author's email address is required "
> > -	    "for compatibility with Git" },
> > +	{ GOT_ERR_BAD_EMAIL, "invalid email address" },
> 
> For the same reason, I would prefer to keep this error as it was.

sure, here's an updated diff.

P.S.: there was also a gigantic hole in how i called got_error_fmt: i
was passing an untrusted string to a printf-like function...

diff refs/heads/main refs/heads/fixmail
commit - d2587c5f95c6edb51ccc8d4abfac838b58f3a463
commit + f7bfc9a9c7bbbfb7f742c32e1519c2453793e38b
blob - b7c305dd8fba01e7b8b4d08eae548d3f7c53246c
blob + 720031fb1fb989aeb5dc7c6d84ce3ef76af573e7
--- got/got.c
+++ got/got.c
@@ -548,24 +548,28 @@ import_progress(void *arg, const char *path)
 	return NULL;
 }
 
-static int
+static const struct got_error *
 valid_author(const char *author)
 {
+	const char *email = author;
+
 	/*
 	 * Really dumb email address check; we're only doing this to
 	 * avoid git's object parser breaking on commits we create.
 	 */
-	while (*author && *author != '<')
+
+	while (*author && *author != '\n' && *author != '<')
 		author++;
-	if (*author != '<')
-		return 0;
-	while (*author && *author != '@')
+	if (*author++ != '<')
+		goto err;
+	while (*author && *author != '\n' && *author != '<' && *author != '>')
 		author++;
-	if (*author != '@')
-		return 0;
-	while (*author && *author != '>')
-		author++;
-	return *author == '>';
+	if (strcmp(author, ">") != 0)
+		goto err;
+	return NULL;
+
+err:
+	return got_error_fmt(GOT_ERR_COMMIT_NO_EMAIL, "%s", email);
 }
 
 static const struct got_error *
@@ -625,8 +629,8 @@ get_author(char **author, struct got_repository *repo,
 	if (*author == NULL)
 		return got_error_from_errno("strdup");
 
-	if (!valid_author(*author)) {
-		err = got_error_fmt(GOT_ERR_COMMIT_NO_EMAIL, "%s", *author);
+	err = valid_author(*author);
+	if (err) {
 		free(*author);
 		*author = NULL;
 	}
blob - 4fe2c68a57b89e39bec69f8c2dbcf24f0f265384
blob + 74b1300abdc8bad6089c22251a9efd9642490d49
--- regress/cmdline/commit.sh
+++ regress/cmdline/commit.sh
@@ -640,7 +640,11 @@ test_commit_outside_refs_heads() {
 
 test_commit_no_email() {
 	local testroot=`test_init commit_no_email`
+	local errmsg=""
 
+	errmsg="commit author's email address is required for"
+	errmsg="$errmsg compatibility with Git"
+
 	got checkout $testroot/repo $testroot/wt > /dev/null
 	ret=$?
 	if [ $ret -ne 0 ]; then
@@ -653,10 +657,7 @@ test_commit_no_email() {
 		got commit -m 'test no email' > $testroot/stdout \
 		2> $testroot/stderr)
 
-	echo -n "got: :flan_hacker:: commit author's email address " \
-		> $testroot/stderr.expected
-	echo "is required for compatibility with Git" \
-		>> $testroot/stderr.expected
+	printf "got: :flan_hacker:: %s\n" "$errmsg" > $testroot/stderr.expected
 	cmp -s $testroot/stderr.expected $testroot/stderr
 	ret=$?
 	if [ $ret -ne 0 ]; then
@@ -670,8 +671,56 @@ test_commit_no_email() {
 	ret=$?
 	if [ $ret -ne 0 ]; then
 		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" $ret
+		return 1
 	fi
-	test_done "$testroot" "$ret"
+
+	# try again with a newline inside the email
+	(cd $testroot/wt \
+		&& FS=' ' env GOT_AUTHOR="$(printf "Flan <hack\ner>")" \
+		got commit -m 'test invalid email' > $testroot/stdout \
+		2> $testroot/stderr)
+
+	printf "got: Flan <hack\ner>: %s\n" "$errmsg" \
+		> $testroot/stderr.expected
+	cmp -s $testroot/stderr.expected $testroot/stderr
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stderr.expected $testroot/stderr
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	echo -n > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	# try again with a < inside the email
+	(cd $testroot/wt && env GOT_AUTHOR="$(printf "Flan <ha<ker>")" \
+		got commit -m 'test invalid email' > $testroot/stdout \
+		2> $testroot/stderr)
+
+	printf "got: Flan <ha<ker>: %s\n" "$errmsg" > $testroot/stderr.expected
+	cmp -s $testroot/stderr.expected $testroot/stderr
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stderr.expected $testroot/stderr
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	echo -n > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+	fi
+	test_done "$testroot" $ret
 }
 
 test_commit_tree_entry_sorting() {