"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: gotwebd: urldecode querystring
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Sat, 03 Sep 2022 12:27:17 +0200

Download raw body.

Thread
On 2022/09/03 12:01:06 +0200, Stefan Sperling <stsp@stsp.name> wrote:
> On Sat, Sep 03, 2022 at 11:27:57AM +0200, Omar Polo wrote:
> > As a safety guard I'm rejecting the %00 sequence which, while it's
> > perfectly valid way of encoding '\0', shouldn't be present on any
> > gotwebd query parameter.
> 
> Could we be even stricter than this?
> For example, is there any reason for non-ASCII characters to be present
> in the decoded query string, unless the data is a path?

There are only two things that gotwebd puts into the querystring:
branch names and file paths.  branch names have a few restrictions
over file names, but both are pretty flexible in the bytes range
allowed.

paths anyway allow every byte except NUL.

> > +/* percent-decode `val' in place. */
> >  static const struct got_error *
> > +gotweb_urldecode(char *val)
> > +{
> > +	for (; *val; ++val) {
> > +		if (*val != '%')
> > +			continue;
> > +
> > +		if (!isxdigit((unsigned char)val[1]) ||
> > +		    !isxdigit((unsigned char)val[2]) ||
> > +		    (val[1] == '0' && val[2] == '0'))
> > +			return got_error(GOT_ERR_BAD_QUERYSTRING);
> > +
> > +		if (sscanf(val + 1, "%2hhx", val) != 1)
> > +			return got_error(GOT_ERR_BAD_QUERYSTRING);
> > +		memmove(val + 1, val + 3, strlen(val + 3) + 1); /* NUL */
> 
> Could we manually parse and translate the hex value instead of using scanf()?
> See url_decode() in usr.sbin/httpd/httpd.c for example.

thanks for the pointer!  I was a bit too lazy here.  I like httpd'
url_decode, here's a new version of the diff using it.

There's one thing that I don't get in the httpd' url_decode function though:

  569	/* Encoding character is followed by two hex chars */
  570	if (!(isxdigit((unsigned char)p[1]) &&
  571	    isxdigit((unsigned char)p[2])))
  572		return (NULL);
  573

shouldn't be like in diff below?


diff b94206d0acc1c55bad1233c35f959fa7c4af297b 5f4d240733c5d83c2ad746232c22c04f26f04d79
commit - b94206d0acc1c55bad1233c35f959fa7c4af297b
commit + 5f4d240733c5d83c2ad746232c22c04f26f04d79
blob - 0160fc30c590ae25406be33eb3401352925def09
blob + 401b70e6dff3d9c32b2e251335998950b2a3ee8b
--- gotwebd/gotweb.c
+++ gotwebd/gotweb.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2016, 2019, 2020-2022 Tracey Emery <tracey@traceyemery.net>
  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
+ * Copyright (c) 2014 Reyk Floeter <reyk@openbsd.org>
  * Copyright (c) 2013 David Gwynne <dlg@openbsd.org>
  * Copyright (c) 2013 Florian Obser <florian@openbsd.org>
  *
@@ -23,6 +24,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <event.h>
@@ -421,13 +423,62 @@ err:
 	return error;
 }
 
+/*
+ * Adapted from usr.sbin/httpd/httpd.c url_decode.
+ */
 static const struct got_error *
+gotweb_urldecode(char *url)
+{
+	char		*p, *q;
+	char		 hex[3];
+	unsigned long	 x;
+
+	hex[2] = '\0';
+	p = q = url;
+
+	while (*p != '\0') {
+		switch (*p) {
+		case '%':
+			/* Encoding character is followed by two hex chars */
+			if (!isxdigit((unsigned char)p[1]) ||
+			    !isxdigit((unsigned char)p[2]) ||
+			    (p[1] == '0' && p[2] == '0'))
+				return got_error(GOT_ERR_BAD_QUERYSTRING);
+
+			hex[0] = p[1];
+			hex[1] = p[2];
+
+			/*
+			 * We don't have to validate "hex" because it is
+			 * guaranteed to include two hex chars followed by nul.
+			 */
+			x = strtoul(hex, NULL, 16);
+			*q = (char)x;
+			p += 2;
+			break;
+		default:
+			*q = *p;
+			break;
+		}
+		p++;
+		q++;
+	}
+	*q = '\0';
+
+	return NULL;
+}
+
+static const struct got_error *
 gotweb_assign_querystring(struct querystring **qs, char *key, char *value)
 {
 	const struct got_error *error = NULL;
 	const char *errstr;
 	int a_cnt, el_cnt;
 
+	error = gotweb_urldecode(value);
+	if (error)
+		return error;
+
 	for (el_cnt = 0; el_cnt < QSELEM__MAX; el_cnt++) {
 		if (strcmp(key, querystring_keys[el_cnt].name) != 0)
 			continue;
blob - b9ff4ec842e0441442db7c0cbce9ffc843bc4b47
blob + 81dce95fb636e4a9cb9a660616f6048f8403eb14
--- include/got_error.h
+++ include/got_error.h
@@ -173,6 +173,7 @@
 #define GOT_ERR_VERIFY_TAG_SIGNATURE 155
 #define GOT_ERR_SIGNING_TAG	156
 #define GOT_ERR_COMMIT_REDUNDANT_AUTHOR 157
+#define GOT_ERR_BAD_QUERYSTRING	158
 
 struct got_error {
         int code;
blob - f208e82e92fa8d5f11f714140e353215e8477e04
blob + 711554564cdd65bb927ad7bc3bebbbc253efd6ac
--- lib/error.c
+++ lib/error.c
@@ -222,6 +222,7 @@ static const struct got_error got_errors[] = {
 	{ GOT_ERR_SIGNING_TAG, "unable to sign tag" },
 	{ GOT_ERR_COMMIT_REDUNDANT_AUTHOR, "specified author is equal to the "
 	    "default one"},
+	{ GOT_ERR_BAD_QUERYSTRING, "invalid query string" },
 };
 
 static struct got_custom_error {