From: Omar Polo Subject: gotwebd: urldecode querystring To: gameoftrees@openbsd.org Date: Sat, 03 Sep 2022 11:27:57 +0200 This makes gotwebd properly url-decode (or percent-decode) the query string. What does this mean in layman's terms? Easy, say goodbye to logs entry like gotweb_process_request: %2Fcompat: no such entry found in tree :) One nice thing about urldecoding is that it can be safely done in-place. URLs encode some otherwise invalid bytes using the %XY notation where 'XY' is the offending byte encoded as hexadecimal string of two characters (e.g. the space is %20, '%' itself is %25, etc...) Of course this being the web there are a bazillion of different ways of doing things. Sometimes the space is encoded as the '+' character. I decided to not respect this; a follow-up diff to properly generate urls in gotwebd will start encoding all the spaces as '%20' so it shouldn't be an issue. If we find that this is a real issue, we can easily tweak gotweb_urldecode to support that notation too. As a safety guard I'm rejecting the %00 sequence which, while it's perfectly valid way of encoding '\0', shouldn't be present on any gotwebd query parameter. diff -s /home/op/w/got commit - e5e662e42c45f0d30f5f97fb0e2ad5f3c4f8b488 path + /home/op/w/got (staged changes) blob - 0160fc30c590ae25406be33eb3401352925def09 blob + 00c0cad7a1dfa41e7edf76bc95925af1573138c8 --- gotwebd/gotweb.c +++ gotwebd/gotweb.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -421,13 +422,38 @@ err: return error; } +/* percent-decode `val' in place. */ static const struct got_error * +gotweb_urldecode(char *val) +{ + for (; *val; ++val) { + if (*val != '%') + continue; + + if (!isxdigit((unsigned char)val[1]) || + !isxdigit((unsigned char)val[2]) || + (val[1] == '0' && val[2] == '0')) + return got_error(GOT_ERR_BAD_QUERYSTRING); + + if (sscanf(val + 1, "%2hhx", val) != 1) + return got_error(GOT_ERR_BAD_QUERYSTRING); + memmove(val + 1, val + 3, strlen(val + 3) + 1); /* NUL */ + } + + return NULL; +} + +static const struct got_error * gotweb_assign_querystring(struct querystring **qs, char *key, char *value) { const struct got_error *error = NULL; const char *errstr; int a_cnt, el_cnt; + error = gotweb_urldecode(value); + if (error) + return error; + for (el_cnt = 0; el_cnt < QSELEM__MAX; el_cnt++) { if (strcmp(key, querystring_keys[el_cnt].name) != 0) continue; blob - b9ff4ec842e0441442db7c0cbce9ffc843bc4b47 blob + 81dce95fb636e4a9cb9a660616f6048f8403eb14 --- include/got_error.h +++ include/got_error.h @@ -173,6 +173,7 @@ #define GOT_ERR_VERIFY_TAG_SIGNATURE 155 #define GOT_ERR_SIGNING_TAG 156 #define GOT_ERR_COMMIT_REDUNDANT_AUTHOR 157 +#define GOT_ERR_BAD_QUERYSTRING 158 struct got_error { int code; blob - f208e82e92fa8d5f11f714140e353215e8477e04 blob + 711554564cdd65bb927ad7bc3bebbbc253efd6ac --- lib/error.c +++ lib/error.c @@ -222,6 +222,7 @@ static const struct got_error got_errors[] = { { GOT_ERR_SIGNING_TAG, "unable to sign tag" }, { GOT_ERR_COMMIT_REDUNDANT_AUTHOR, "specified author is equal to the " "default one"}, + { GOT_ERR_BAD_QUERYSTRING, "invalid query string" }, }; static struct got_custom_error {