Download raw body.
gotwebd: urldecode querystring
On 2022/09/03 12:01:06 +0200, Stefan Sperling <stsp@stsp.name> wrote: > On Sat, Sep 03, 2022 at 11:27:57AM +0200, Omar Polo wrote: > > As a safety guard I'm rejecting the %00 sequence which, while it's > > perfectly valid way of encoding '\0', shouldn't be present on any > > gotwebd query parameter. > > Could we be even stricter than this? > For example, is there any reason for non-ASCII characters to be present > in the decoded query string, unless the data is a path? There are only two things that gotwebd puts into the querystring: branch names and file paths. branch names have a few restrictions over file names, but both are pretty flexible in the bytes range allowed. paths anyway allow every byte except NUL. > > +/* percent-decode `val' in place. */ > > static const struct got_error * > > +gotweb_urldecode(char *val) > > +{ > > + for (; *val; ++val) { > > + if (*val != '%') > > + continue; > > + > > + if (!isxdigit((unsigned char)val[1]) || > > + !isxdigit((unsigned char)val[2]) || > > + (val[1] == '0' && val[2] == '0')) > > + return got_error(GOT_ERR_BAD_QUERYSTRING); > > + > > + if (sscanf(val + 1, "%2hhx", val) != 1) > > + return got_error(GOT_ERR_BAD_QUERYSTRING); > > + memmove(val + 1, val + 3, strlen(val + 3) + 1); /* NUL */ > > Could we manually parse and translate the hex value instead of using scanf()? > See url_decode() in usr.sbin/httpd/httpd.c for example. thanks for the pointer! I was a bit too lazy here. I like httpd' url_decode, here's a new version of the diff using it. There's one thing that I don't get in the httpd' url_decode function though: 569 /* Encoding character is followed by two hex chars */ 570 if (!(isxdigit((unsigned char)p[1]) && 571 isxdigit((unsigned char)p[2]))) 572 return (NULL); 573 shouldn't be like in diff below? diff b94206d0acc1c55bad1233c35f959fa7c4af297b 5f4d240733c5d83c2ad746232c22c04f26f04d79 commit - b94206d0acc1c55bad1233c35f959fa7c4af297b commit + 5f4d240733c5d83c2ad746232c22c04f26f04d79 blob - 0160fc30c590ae25406be33eb3401352925def09 blob + 401b70e6dff3d9c32b2e251335998950b2a3ee8b --- gotwebd/gotweb.c +++ gotwebd/gotweb.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2016, 2019, 2020-2022 Tracey Emery <tracey@traceyemery.net> * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> + * Copyright (c) 2014 Reyk Floeter <reyk@openbsd.org> * Copyright (c) 2013 David Gwynne <dlg@openbsd.org> * Copyright (c) 2013 Florian Obser <florian@openbsd.org> * @@ -23,6 +24,7 @@ #include <sys/stat.h> #include <sys/types.h> +#include <ctype.h> #include <dirent.h> #include <errno.h> #include <event.h> @@ -421,13 +423,62 @@ err: return error; } +/* + * Adapted from usr.sbin/httpd/httpd.c url_decode. + */ static const struct got_error * +gotweb_urldecode(char *url) +{ + char *p, *q; + char hex[3]; + unsigned long x; + + hex[2] = '\0'; + p = q = url; + + while (*p != '\0') { + switch (*p) { + case '%': + /* Encoding character is followed by two hex chars */ + if (!isxdigit((unsigned char)p[1]) || + !isxdigit((unsigned char)p[2]) || + (p[1] == '0' && p[2] == '0')) + return got_error(GOT_ERR_BAD_QUERYSTRING); + + hex[0] = p[1]; + hex[1] = p[2]; + + /* + * We don't have to validate "hex" because it is + * guaranteed to include two hex chars followed by nul. + */ + x = strtoul(hex, NULL, 16); + *q = (char)x; + p += 2; + break; + default: + *q = *p; + break; + } + p++; + q++; + } + *q = '\0'; + + return NULL; +} + +static const struct got_error * gotweb_assign_querystring(struct querystring **qs, char *key, char *value) { const struct got_error *error = NULL; const char *errstr; int a_cnt, el_cnt; + error = gotweb_urldecode(value); + if (error) + return error; + for (el_cnt = 0; el_cnt < QSELEM__MAX; el_cnt++) { if (strcmp(key, querystring_keys[el_cnt].name) != 0) continue; blob - b9ff4ec842e0441442db7c0cbce9ffc843bc4b47 blob + 81dce95fb636e4a9cb9a660616f6048f8403eb14 --- include/got_error.h +++ include/got_error.h @@ -173,6 +173,7 @@ #define GOT_ERR_VERIFY_TAG_SIGNATURE 155 #define GOT_ERR_SIGNING_TAG 156 #define GOT_ERR_COMMIT_REDUNDANT_AUTHOR 157 +#define GOT_ERR_BAD_QUERYSTRING 158 struct got_error { int code; blob - f208e82e92fa8d5f11f714140e353215e8477e04 blob + 711554564cdd65bb927ad7bc3bebbbc253efd6ac --- lib/error.c +++ lib/error.c @@ -222,6 +222,7 @@ static const struct got_error got_errors[] = { { GOT_ERR_SIGNING_TAG, "unable to sign tag" }, { GOT_ERR_COMMIT_REDUNDANT_AUTHOR, "specified author is equal to the " "default one"}, + { GOT_ERR_BAD_QUERYSTRING, "invalid query string" }, }; static struct got_custom_error {
gotwebd: urldecode querystring