"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
gotwebd: urldecode querystring
To:
gameoftrees@openbsd.org
Date:
Sat, 03 Sep 2022 11:27:57 +0200

Download raw body.

Thread
This makes gotwebd properly url-decode (or percent-decode) the query
string.  What does this mean in layman's terms?  Easy, say goodbye to
logs entry like

gotweb_process_request: %2Fcompat: no such entry found in tree

:)

One nice thing about urldecoding is that it can be safely done
in-place.  URLs encode some otherwise invalid bytes using the %XY
notation where 'XY' is the offending byte encoded as hexadecimal
string of two characters (e.g. the space is %20, '%' itself is %25,
etc...)

Of course this being the web there are a bazillion of different ways
of doing things.  Sometimes the space is encoded as the '+' character.
I decided to not respect this; a follow-up diff to properly generate
urls in gotwebd will start encoding all the spaces as '%20' so it
shouldn't be an issue.  If we find that this is a real issue, we can
easily tweak gotweb_urldecode to support that notation too.

As a safety guard I'm rejecting the %00 sequence which, while it's
perfectly valid way of encoding '\0', shouldn't be present on any
gotwebd query parameter.

diff -s /home/op/w/got
commit - e5e662e42c45f0d30f5f97fb0e2ad5f3c4f8b488
path + /home/op/w/got (staged changes)
blob - 0160fc30c590ae25406be33eb3401352925def09
blob + 00c0cad7a1dfa41e7edf76bc95925af1573138c8
--- gotwebd/gotweb.c
+++ gotwebd/gotweb.c
@@ -23,6 +23,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <event.h>
@@ -421,13 +422,38 @@ err:
 	return error;
 }
 
+/* percent-decode `val' in place. */
 static const struct got_error *
+gotweb_urldecode(char *val)
+{
+	for (; *val; ++val) {
+		if (*val != '%')
+			continue;
+
+		if (!isxdigit((unsigned char)val[1]) ||
+		    !isxdigit((unsigned char)val[2]) ||
+		    (val[1] == '0' && val[2] == '0'))
+			return got_error(GOT_ERR_BAD_QUERYSTRING);
+
+		if (sscanf(val + 1, "%2hhx", val) != 1)
+			return got_error(GOT_ERR_BAD_QUERYSTRING);
+		memmove(val + 1, val + 3, strlen(val + 3) + 1); /* NUL */
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
 gotweb_assign_querystring(struct querystring **qs, char *key, char *value)
 {
 	const struct got_error *error = NULL;
 	const char *errstr;
 	int a_cnt, el_cnt;
 
+	error = gotweb_urldecode(value);
+	if (error)
+		return error;
+
 	for (el_cnt = 0; el_cnt < QSELEM__MAX; el_cnt++) {
 		if (strcmp(key, querystring_keys[el_cnt].name) != 0)
 			continue;
blob - b9ff4ec842e0441442db7c0cbce9ffc843bc4b47
blob + 81dce95fb636e4a9cb9a660616f6048f8403eb14
--- include/got_error.h
+++ include/got_error.h
@@ -173,6 +173,7 @@
 #define GOT_ERR_VERIFY_TAG_SIGNATURE 155
 #define GOT_ERR_SIGNING_TAG	156
 #define GOT_ERR_COMMIT_REDUNDANT_AUTHOR 157
+#define GOT_ERR_BAD_QUERYSTRING	158
 
 struct got_error {
         int code;
blob - f208e82e92fa8d5f11f714140e353215e8477e04
blob + 711554564cdd65bb927ad7bc3bebbbc253efd6ac
--- lib/error.c
+++ lib/error.c
@@ -222,6 +222,7 @@ static const struct got_error got_errors[] = {
 	{ GOT_ERR_SIGNING_TAG, "unable to sign tag" },
 	{ GOT_ERR_COMMIT_REDUNDANT_AUTHOR, "specified author is equal to the "
 	    "default one"},
+	{ GOT_ERR_BAD_QUERYSTRING, "invalid query string" },
 };
 
 static struct got_custom_error {