From: Omar Polo Subject: WIP: rework gotwebd URL style To: gameoftrees@openbsd.org Date: Tue, 28 May 2024 17:35:45 +0200 First of all, this is work in progress and not intended to be committed as-is. The idea is to change the structure of gotwebd URLs since the current one is both overly verbose and IMHO a bit ugly. My proposal is to use the following general format repository-name/action/(commit or ref)/path which can be of course abbreviated. for example "/amused.git/" as an implicit action of SUMMARY, and "/" the implicit action of INDEX. The special case "/N" with N being a number is to paginate the INDEX. There's no ambiguity with refs that have a '/' in them since I'm percent-encoding that field, for e.g. telescope.git/tree/proposal%2Fbreak-line/contrib/ ("proposal%2Fbreak-line" is "proposal/break-line", and the path is "/contrib/") There's however an added ambiguity between commits and refs, like in got(1) command line. got_operations.c is probably not ready to handle this, and it's something that I still have to look into. This also change gotwebd to optionally serve the assets by itself. This is not strictly needed, but it prevents the httpd configuration to grow. (we'd need 6 location blocks instead of one otherwise :/) of course httpd is trying to be smart and failing badly at it. I'm using this hammer at the moment, will look for a proper solution. : --- usr.sbin/httpd/httpd.c : +++ usr.sbin/httpd/httpd.c : @@ -654,6 +654,9 @@ path_info(char *path) : struct stat st; : int ret; : : + if (!strncmp(path, "/htdocs/gotwebd/", 16)) : + return (16); : + : start = path; : end = start + strlen(path); To avoid breaking existing links, I'm still keeping the code to parse the querystring, so that we can safely rollover to the new URI scheme without breakage. (something i strongly care about.) Finally, there are probably regressions. I'm sending this to gauge the interest and start a discussion before turning this into something committable. thoughts? diff /home/op/w/got commit - c2f5b3e63df878c687928a616ef24e87131a795e path + /home/op/w/got blob - 0b8297b06fb4197ffb6bbcabde06045db0db95d5 file + gotwebd/fcgi.c --- gotwebd/fcgi.c +++ gotwebd/fcgi.c @@ -246,6 +246,13 @@ fcgi_parse_params(uint8_t *buf, uint16_t n, struct req c->document_uri[val_len] = '\0'; } + if (val_len < MAX_DOCUMENT_ROOT && + name_len == 13 && + strncmp(buf, "DOCUMENT_ROOT", 13) == 0) { + memcpy(c->document_root, val, val_len); + c->document_root[val_len] = '\0'; + } + if (val_len < MAX_SERVER_NAME && name_len == 11 && strncmp(buf, "SERVER_NAME", 11) == 0) { @@ -253,6 +260,20 @@ fcgi_parse_params(uint8_t *buf, uint16_t n, struct req c->server_name[val_len] = '\0'; } + if (val_len < MAX_SCRIPT_NAME && + name_len == 11 && + strncmp(buf, "SCRIPT_NAME", 11) == 0) { + memcpy(c->script_name, val, val_len); + c->script_name[val_len] = '\0'; + } + + if (val_len < MAX_PATH_INFO && + name_len == 9 && + strncmp(buf, "PATH_INFO", 9) == 0) { + memcpy(c->path_info, val, val_len); + c->path_info[val_len] = '\0'; + } + if (name_len == 5 && strncmp(buf, "HTTPS", 5) == 0) c->https = 1; blob - 8cc103e1ce5dd08b81df1be04fee1d2d01c8f66c file + gotwebd/gotweb.c --- gotwebd/gotweb.c +++ gotwebd/gotweb.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -80,9 +81,12 @@ static const struct action_keys action_keys[] = { { "rss", RSS }, }; +static const struct got_error *gotweb_urldecode(char *); static const struct got_error *gotweb_init_querystring(struct querystring **); static const struct got_error *gotweb_parse_querystring(struct querystring *, char *); +static const struct got_error *gotweb_parse_url(struct querystring *, + struct request *); static const struct got_error *gotweb_assign_querystring(struct querystring *, char *, char *); static int gotweb_render_index(struct template *); @@ -138,6 +142,78 @@ gotweb_reply_file(struct request *c, const char *ctype return gotweb_reply(c, 200, ctype, NULL); } +static int +do_static_file(struct request *c) +{ + struct stat sb; + FILE *fp; + char path[PATH_MAX]; + char buf[BUFSIZ]; + const char *ext; + const char *mime = NULL; + int r; + size_t i, len; + const struct { + const char *ext; + const char *mime; + } pairs[] = { + { "css", "text/css" }, + { "ico", "image/x-icon" }, + { "png", "image/png" }, + { "webmanifest", "application/manifest+json" }, + { "xml", "text/xml" }, + }; + + ext = strrchr(c->path_info, '.'); + if (ext == NULL) + return (0); + ext++; + + for (i = 0; i < nitems(pairs); ++i) { + if (!strcmp(pairs[i].ext, ext)) { + mime = pairs[i].mime; + break; + } + } + if (mime == NULL) + return (0); + + r = snprintf(path, sizeof(path), "%s/%s", c->document_root, c->path_info); + if (r < 0 || (size_t)r >= sizeof(path)) + return (0); + + fp = fopen(path, "r"); + if (fp == NULL) { + log_debug("%s: fopen %s", __func__, path); + return (0); + } + + if (fstat(fileno(fp), &sb) == -1) { + log_warn("fstat"); + fclose(fp); + gotweb_reply(c, 500, "text/plain", NULL); + return (1); + } + + if (tp_writef(c->tp, "Content-type: %s\r\n" + "Content-Length: %lld\n\n", mime, (long long)sb.st_size) == -1) + goto done; + if (template_flush(c->tp) == -1) + goto done; + + for (;;) { + len = fread(buf, 1, sizeof(buf), fp); + if (len == 0) + break; + if (fcgi_write(c, buf, len) == -1) + break; + } + + done: + fclose(fp); + return (1); +} + void gotweb_process_request(struct request *c) { @@ -150,6 +226,15 @@ gotweb_process_request(struct request *c) size_t len; int r, binary = 0; + got_path_strip_trailing_slashes(c->document_root); + got_path_strip_trailing_slashes(c->script_name); + + log_warnx("---"); + log_warnx("doc root is %s", c->document_root); + log_warnx("script name is %s", c->script_name); + log_warnx("path info is %s", c->path_info); + log_warnx("querystring is %s", c->querystring); + /* init the transport */ error = gotweb_init_transport(&c->t); if (error) { @@ -169,8 +254,17 @@ gotweb_process_request(struct request *c) log_warnx("%s: %s", __func__, error->msg); goto err; } + + if (do_static_file(c)) + return; + c->t->qs = qs; - error = gotweb_parse_querystring(qs, c->querystring); + + if (c->path_info[0] != '\0') + error = gotweb_parse_url(qs, c); + else + error = gotweb_parse_querystring(qs, c->querystring); + if (error) { log_warnx("%s: %s", __func__, error->msg); goto err; @@ -465,6 +559,92 @@ gotweb_init_querystring(struct querystring **qs) } static const struct got_error * +gotweb_parse_url(struct querystring *qs, struct request *c) +{ + static const struct got_error *err; + const char *errstr; + char ch, *last, *path = c->path_info; + size_t seg, i; + + while (*path == '/') + path++; + + if (*path == '\0') + return NULL; + + /* if path is just a numer, then it's the index page number */ + qs->index_page = strtonum(path, 0, INT64_MAX, &errstr); + if (errstr == NULL) + return NULL; + qs->index_page = -1; + + seg = strcspn(path, "/"); + if ((qs->path = strndup(path, seg)) == NULL) + return got_error_from_errno("strdup"); + + path += seg + 1; + while (*path == '/') + ++path; + if (*path == '\0') { + qs->action = SUMMARY; + return 0; + } + + seg = strcspn(path, "/"); + ch = path[seg]; + path[seg] = '\0'; + qs->action = ERR; + for (i = 0; i < nitems(action_keys); i++) { + if (strcmp(path, action_keys[i].name) != 0) + continue; + qs->action = action_keys[i].action; + break; + } + path[seg] = ch; + if (qs->action == ERR) + return got_error(GOT_ERR_BAD_QUERYSTRING); + + if (path[seg] == '\0') + return 0; + path += seg + 1; + + seg = strcspn(path, "/"); + qs->headref = strndup(path, seg); + if (qs->headref == NULL) + return got_error_from_errno("strndup"); + err = gotweb_urldecode(qs->headref); + if (err != NULL) + return err; + qs->commit = strdup(qs->headref); + if (qs->commit == NULL) + return got_error_from_errno("strdup"); + + if (path[seg] == '\0') + return 0; + path += seg + 1; + + last = strrchr(path, '/'); + if (last == NULL) + last = path; + + qs->folder = strndup(path, last - path); + if (qs->folder == NULL) + return got_error_from_errno("strndup"); + + path = last; + while (*path == '/') + path++; + if (*path == '\0') + return NULL; + + qs->file = strdup(path); + if (qs->file == NULL) + return got_error_from_errno("strdup"); + + return NULL; +} + +static const struct got_error * gotweb_parse_querystring(struct querystring *qs, char *qst) { const struct got_error *error = NULL; @@ -960,74 +1140,45 @@ gotweb_action_name(int action) int gotweb_render_url(struct request *c, struct gotweb_url *url) { - const char *sep = "?", *action; - char *tmp; + char *enc; int r; - action = gotweb_action_name(url->action); - if (action != NULL) { - if (tp_writef(c->tp, "?action=%s", action) == -1) - return -1; - sep = "&"; - } + if (tp_writef(c->tp, "%s%s", c->script_name, + *c->script_name == '\0' ? "" : "/") == -1) + return -1; - if (url->commit) { - if (tp_writef(c->tp, "%scommit=%s", sep, url->commit) == -1) - return -1; - sep = "&"; + if (url->action == INDEX) { + if (url->index_page != -1) + return tp_writef(c->tp, "%d", url->index_page); + return 0; } - if (url->file) { - tmp = gotweb_urlencode(url->file); - if (tmp == NULL) - return -1; - r = tp_writef(c->tp, "%sfile=%s", sep, tmp); - free(tmp); - if (r == -1) - return -1; - sep = "&"; - } + assert(url->path != NULL); + if (tp_writef(c->tp, "%s/", url->path) == -1) + return -1; - if (url->folder) { - tmp = gotweb_urlencode(url->folder); - if (tmp == NULL) - return -1; - r = tp_writef(c->tp, "%sfolder=%s", sep, tmp); - free(tmp); - if (r == -1) - return -1; - sep = "&"; - } + if (url->action == SUMMARY) + return 0; - if (url->headref) { - tmp = gotweb_urlencode(url->headref); - if (tmp == NULL) - return -1; - r = tp_writef(c->tp, "%sheadref=%s", sep, url->headref); - free(tmp); - if (r == -1) - return -1; - sep = "&"; - } + if (tp_writef(c->tp, "%s/", gotweb_action_name(url->action)) == -1) + return -1; - if (url->index_page != -1) { - if (tp_writef(c->tp, "%sindex_page=%d", sep, - url->index_page) == -1) - return -1; - sep = "&"; - } + /* must have a commit id or headref */ + if (url->commit == NULL && url->headref == NULL) + return -1; + enc = gotweb_urlencode(url->commit ? url->commit : url->headref); + if (enc == NULL) + return -1; + r = tp_writef(c->tp, "%s/", enc); + free(enc); + if (r == -1) + return -1; - if (url->path) { - tmp = gotweb_urlencode(url->path); - if (tmp == NULL) - return -1; - r = tp_writef(c->tp, "%spath=%s", sep, tmp); - free(tmp); - if (r == -1) - return -1; - sep = "&"; - } - + if (url->folder && url->folder[0] != '\0' && + tp_writef(c->tp, "%s/", url->folder) == -1) + return -1; + if (url->file && tp_writes(c->tp, url->file) == -1) + return -1; return 0; } blob - c0047da16df32ddb81665645f2025c12d9cf8250 file + gotwebd/gotwebd.h --- gotwebd/gotwebd.h +++ gotwebd/gotwebd.h @@ -50,7 +50,10 @@ /* GOTWEB DEFAULTS */ #define MAX_QUERYSTRING 2048 #define MAX_DOCUMENT_URI 255 +#define MAX_DOCUMENT_ROOT 255 #define MAX_SERVER_NAME 255 +#define MAX_SCRIPT_NAME 255 +#define MAX_PATH_INFO 2048 #define GOTWEB_GIT_DIR ".git" @@ -249,7 +252,10 @@ struct request { char querystring[MAX_QUERYSTRING]; char document_uri[MAX_DOCUMENT_URI]; + char document_root[MAX_DOCUMENT_ROOT]; char server_name[MAX_SERVER_NAME]; + char script_name[MAX_SCRIPT_NAME]; + char path_info[MAX_PATH_INFO]; int https; uint8_t request_started; blob - 22583fa65fa1ec7208c4cc6061197077ced00de8 file + gotwebd/pages.tmpl --- gotwebd/pages.tmpl +++ gotwebd/pages.tmpl @@ -167,7 +167,7 @@ nextsep(char *s, char **t) struct server *srv = c->srv; struct querystring *qs = c->t->qs; struct gotweb_url u_path; - const char *prfx = c->document_uri; + const char *prfx = c->script_name; const char *css = srv->custom_css; memset(&u_path, 0, sizeof(u_path)); @@ -182,18 +182,18 @@ nextsep(char *s, char **t) - - - - - - + + + + + + @@ -755,7 +755,8 @@ nextsep(char *s, char **t) folder = qs->folder ? qs->folder : ""; if (S_ISDIR(mode)) { - if (asprintf(&dir, "%s/%s", folder, name) == -1) + if (asprintf(&dir, "%s%s%s", folder, + *folder ? "/" : "", name) == -1) return (-1); url.action = TREE; blob - e4e422a7568c7bafebdb57d3377c40636ff1945b file + gotwebd/sockets.c --- gotwebd/sockets.c +++ gotwebd/sockets.c @@ -221,6 +221,9 @@ sockets_launch(void) if (error) fatal("%s", error->msg); + if (unveil("/", "r") == -1) + fatal("unveil"); + if (unveil(NULL, NULL) == -1) fatal("unveil"); }