Download raw body.
gotwebd: urldecode querystring
On 2022/09/03 12:01:06 +0200, Stefan Sperling <stsp@stsp.name> wrote:
> On Sat, Sep 03, 2022 at 11:27:57AM +0200, Omar Polo wrote:
> > As a safety guard I'm rejecting the %00 sequence which, while it's
> > perfectly valid way of encoding '\0', shouldn't be present on any
> > gotwebd query parameter.
>
> Could we be even stricter than this?
> For example, is there any reason for non-ASCII characters to be present
> in the decoded query string, unless the data is a path?
There are only two things that gotwebd puts into the querystring:
branch names and file paths. branch names have a few restrictions
over file names, but both are pretty flexible in the bytes range
allowed.
paths anyway allow every byte except NUL.
> > +/* percent-decode `val' in place. */
> > static const struct got_error *
> > +gotweb_urldecode(char *val)
> > +{
> > + for (; *val; ++val) {
> > + if (*val != '%')
> > + continue;
> > +
> > + if (!isxdigit((unsigned char)val[1]) ||
> > + !isxdigit((unsigned char)val[2]) ||
> > + (val[1] == '0' && val[2] == '0'))
> > + return got_error(GOT_ERR_BAD_QUERYSTRING);
> > +
> > + if (sscanf(val + 1, "%2hhx", val) != 1)
> > + return got_error(GOT_ERR_BAD_QUERYSTRING);
> > + memmove(val + 1, val + 3, strlen(val + 3) + 1); /* NUL */
>
> Could we manually parse and translate the hex value instead of using scanf()?
> See url_decode() in usr.sbin/httpd/httpd.c for example.
thanks for the pointer! I was a bit too lazy here. I like httpd'
url_decode, here's a new version of the diff using it.
There's one thing that I don't get in the httpd' url_decode function though:
569 /* Encoding character is followed by two hex chars */
570 if (!(isxdigit((unsigned char)p[1]) &&
571 isxdigit((unsigned char)p[2])))
572 return (NULL);
573
shouldn't be like in diff below?
diff b94206d0acc1c55bad1233c35f959fa7c4af297b 5f4d240733c5d83c2ad746232c22c04f26f04d79
commit - b94206d0acc1c55bad1233c35f959fa7c4af297b
commit + 5f4d240733c5d83c2ad746232c22c04f26f04d79
blob - 0160fc30c590ae25406be33eb3401352925def09
blob + 401b70e6dff3d9c32b2e251335998950b2a3ee8b
--- gotwebd/gotweb.c
+++ gotwebd/gotweb.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2016, 2019, 2020-2022 Tracey Emery <tracey@traceyemery.net>
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
+ * Copyright (c) 2014 Reyk Floeter <reyk@openbsd.org>
* Copyright (c) 2013 David Gwynne <dlg@openbsd.org>
* Copyright (c) 2013 Florian Obser <florian@openbsd.org>
*
@@ -23,6 +24,7 @@
#include <sys/stat.h>
#include <sys/types.h>
+#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <event.h>
@@ -421,13 +423,62 @@ err:
return error;
}
+/*
+ * Adapted from usr.sbin/httpd/httpd.c url_decode.
+ */
static const struct got_error *
+gotweb_urldecode(char *url)
+{
+ char *p, *q;
+ char hex[3];
+ unsigned long x;
+
+ hex[2] = '\0';
+ p = q = url;
+
+ while (*p != '\0') {
+ switch (*p) {
+ case '%':
+ /* Encoding character is followed by two hex chars */
+ if (!isxdigit((unsigned char)p[1]) ||
+ !isxdigit((unsigned char)p[2]) ||
+ (p[1] == '0' && p[2] == '0'))
+ return got_error(GOT_ERR_BAD_QUERYSTRING);
+
+ hex[0] = p[1];
+ hex[1] = p[2];
+
+ /*
+ * We don't have to validate "hex" because it is
+ * guaranteed to include two hex chars followed by nul.
+ */
+ x = strtoul(hex, NULL, 16);
+ *q = (char)x;
+ p += 2;
+ break;
+ default:
+ *q = *p;
+ break;
+ }
+ p++;
+ q++;
+ }
+ *q = '\0';
+
+ return NULL;
+}
+
+static const struct got_error *
gotweb_assign_querystring(struct querystring **qs, char *key, char *value)
{
const struct got_error *error = NULL;
const char *errstr;
int a_cnt, el_cnt;
+ error = gotweb_urldecode(value);
+ if (error)
+ return error;
+
for (el_cnt = 0; el_cnt < QSELEM__MAX; el_cnt++) {
if (strcmp(key, querystring_keys[el_cnt].name) != 0)
continue;
blob - b9ff4ec842e0441442db7c0cbce9ffc843bc4b47
blob + 81dce95fb636e4a9cb9a660616f6048f8403eb14
--- include/got_error.h
+++ include/got_error.h
@@ -173,6 +173,7 @@
#define GOT_ERR_VERIFY_TAG_SIGNATURE 155
#define GOT_ERR_SIGNING_TAG 156
#define GOT_ERR_COMMIT_REDUNDANT_AUTHOR 157
+#define GOT_ERR_BAD_QUERYSTRING 158
struct got_error {
int code;
blob - f208e82e92fa8d5f11f714140e353215e8477e04
blob + 711554564cdd65bb927ad7bc3bebbbc253efd6ac
--- lib/error.c
+++ lib/error.c
@@ -222,6 +222,7 @@ static const struct got_error got_errors[] = {
{ GOT_ERR_SIGNING_TAG, "unable to sign tag" },
{ GOT_ERR_COMMIT_REDUNDANT_AUTHOR, "specified author is equal to the "
"default one"},
+ { GOT_ERR_BAD_QUERYSTRING, "invalid query string" },
};
static struct got_custom_error {
gotwebd: urldecode querystring