"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: WIP: rework gotwebd URL style
To:
gameoftrees@openbsd.org
Date:
Mon, 08 Jul 2024 20:35:12 +0200

Download raw body.

Thread
Omar Polo <op@omarpolo.com> wrote:
> First of all, this is work in progress and not intended to be committed
> as-is.

Here's a slightly better version if this.  At least it seems to work.

> The idea is to change the structure of gotwebd URLs since the current
> one is both overly verbose and IMHO a bit ugly.
> 
> My proposal is to use the following general format
> 
> 	repository-name/action/(commit or ref)/path
> 
> which can be of course abbreviated.  for example "/amused.git/" as an
> implicit action of SUMMARY, and "/" the implicit action of INDEX.  The
> special case "/N" with N being a number is to paginate the INDEX.
> 
> There's no ambiguity with refs that have a '/' in them since I'm
> percent-encoding that field, for e.g.
> 
> 	telescope.git/tree/proposal%2Fbreak-line/contrib/
> 
> ("proposal%2Fbreak-line" is "proposal/break-line", and the path is
> "/contrib/")

This is the beat I'm less convinced with.  We could avoid %encoding the
thing and just keeping "tree/proposals/break-line/contrib" quite easily
and it will make hand-editing the URL easier.  On the other hand, we
could also keep the headref as a query string, like cgit does IIRC.

> There's however an added ambiguity between commits and refs, like in
> got(1) command line.  got_operations.c is probably not ready to handle
> this, and it's something that I still have to look into.

Actually this is not very important.  There are only two places in
got_operations.c where it matters, and in both we could unify the
codepaths.

> This also change gotwebd to optionally serve the assets by itself.  This
> is not strictly needed, but it prevents the httpd configuration to grow.
> (we'd need 6 location blocks instead of one otherwise :/)
> 
> of course httpd is trying to be smart and failing badly at it.  I'm using
> this hammer at the moment, will look for a proper solution.

I'm still using this hammer because httpd is very unpleasant :/

> : --- usr.sbin/httpd/httpd.c
> : +++ usr.sbin/httpd/httpd.c
> : @@ -654,6 +654,9 @@ path_info(char *path)
> :  	struct stat	 st;
> :  	int		 ret;
> :  
> : +	if (!strncmp(path, "/htdocs/gotwebd/", 16))
> : +		return (16);
> : +
> :  	start = path;
> :  	end = start + strlen(path);

diff /home/op/w/got
commit - 75e58d893ea728a250b35dff13d82123aaabc9f0
path + /home/op/w/got
blob - 0b8297b06fb4197ffb6bbcabde06045db0db95d5
file + gotwebd/fcgi.c
--- gotwebd/fcgi.c
+++ gotwebd/fcgi.c
@@ -246,6 +246,13 @@ fcgi_parse_params(uint8_t *buf, uint16_t n, struct req
 			c->document_uri[val_len] = '\0';
 		}
 
+		if (val_len < MAX_DOCUMENT_ROOT &&
+		    name_len == 13 &&
+		    strncmp(buf, "DOCUMENT_ROOT", 13) == 0) {
+			memcpy(c->document_root, val, val_len);
+			c->document_root[val_len] = '\0';
+		}
+
 		if (val_len < MAX_SERVER_NAME &&
 		    name_len == 11 &&
 		    strncmp(buf, "SERVER_NAME", 11) == 0) {
@@ -253,6 +260,20 @@ fcgi_parse_params(uint8_t *buf, uint16_t n, struct req
 			c->server_name[val_len] = '\0';
 		}
 
+		if (val_len < MAX_SCRIPT_NAME &&
+		    name_len == 11 &&
+		    strncmp(buf, "SCRIPT_NAME", 11) == 0) {
+			memcpy(c->script_name, val, val_len);
+			c->script_name[val_len] = '\0';
+		}
+
+		if (val_len < MAX_PATH_INFO &&
+		    name_len == 9 &&
+		    strncmp(buf, "PATH_INFO", 9) == 0) {
+			memcpy(c->path_info, val, val_len);
+			c->path_info[val_len] = '\0';
+		}
+
 		if (name_len == 5 &&
 		    strncmp(buf, "HTTPS", 5) == 0)
 			c->https = 1;
blob - 8cc103e1ce5dd08b81df1be04fee1d2d01c8f66c
file + gotwebd/gotweb.c
--- gotwebd/gotweb.c
+++ gotwebd/gotweb.c
@@ -24,6 +24,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include <assert.h>
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
@@ -80,9 +81,12 @@ static const struct action_keys action_keys[] = {
 	{ "rss",	RSS },
 };
 
+static const struct got_error *gotweb_urldecode(char *);
 static const struct got_error *gotweb_init_querystring(struct querystring **);
 static const struct got_error *gotweb_parse_querystring(struct querystring *,
     char *);
+static const struct got_error *gotweb_parse_url(struct querystring *,
+    struct request *);
 static const struct got_error *gotweb_assign_querystring(struct querystring *,
     char *, char *);
 static int gotweb_render_index(struct template *);
@@ -138,6 +142,79 @@ gotweb_reply_file(struct request *c, const char *ctype
 	return gotweb_reply(c, 200, ctype, NULL);
 }
 
+static int
+do_static_file(struct request *c)
+{
+	struct stat	 sb;
+	FILE		*fp;
+	char		 path[PATH_MAX];
+	char		 buf[BUFSIZ];
+	const char	*ext;
+	const char	*mime = NULL;
+	int		 r;
+	size_t		 i, len;
+	const struct {
+		const char	*ext;
+		const char	*mime;
+	} pairs[] = {
+		{ "css", "text/css" },
+		{ "ico", "image/x-icon" },
+		{ "png", "image/png" },
+		{ "webmanifest", "application/manifest+json" },
+		{ "xml", "text/xml" },
+	};
+
+	ext = strrchr(c->path_info, '.');
+	if (ext == NULL)
+		return (0);
+	ext++;
+
+	for (i = 0; i < nitems(pairs); ++i) {
+		if (!strcmp(pairs[i].ext, ext)) {
+			mime = pairs[i].mime;
+			break;
+		}
+	}
+	if (mime == NULL)
+		return (0);
+
+	r = snprintf(path, sizeof(path), "%s/%s", c->document_root,
+	    c->path_info);
+	if (r < 0 || (size_t)r >= sizeof(path))
+		return (0);
+
+	fp = fopen(path, "r");
+	if (fp == NULL) {
+		log_debug("%s: fopen %s", __func__, path);
+		return (0);
+	}
+
+	if (fstat(fileno(fp), &sb) == -1) {
+		log_warn("fstat");
+		fclose(fp);
+		gotweb_reply(c, 500, "text/plain", NULL);
+		return (1);
+	}
+
+	if (tp_writef(c->tp, "Content-type: %s\r\n"
+	    "Content-Length: %lld\r\n\r\n", mime, (long long)sb.st_size) == -1)
+		goto done;
+	if (template_flush(c->tp) == -1)
+		goto done;
+
+	for (;;) {
+		len = fread(buf, 1, sizeof(buf), fp);
+		if (len == 0)
+			break;
+		if (fcgi_write(c, buf, len) == -1)
+			break;
+	}
+
+ done:
+	fclose(fp);
+	return (1);
+}
+
 void
 gotweb_process_request(struct request *c)
 {
@@ -150,6 +227,9 @@ gotweb_process_request(struct request *c)
 	size_t len;
 	int r, binary = 0;
 
+	got_path_strip_trailing_slashes(c->document_root);
+	got_path_strip_trailing_slashes(c->script_name);
+
 	/* init the transport */
 	error = gotweb_init_transport(&c->t);
 	if (error) {
@@ -169,8 +249,16 @@ gotweb_process_request(struct request *c)
 		log_warnx("%s: %s", __func__, error->msg);
 		goto err;
 	}
+
+	if (do_static_file(c))
+		return;
+
 	c->t->qs = qs;
-	error = gotweb_parse_querystring(qs, c->querystring);
+
+	if (c->path_info[0] != '\0')
+		error = gotweb_parse_url(qs, c);
+	else
+		error = gotweb_parse_querystring(qs, c->querystring);
 	if (error) {
 		log_warnx("%s: %s", __func__, error->msg);
 		goto err;
@@ -465,6 +553,92 @@ gotweb_init_querystring(struct querystring **qs)
 }
 
 static const struct got_error *
+gotweb_parse_url(struct querystring *qs, struct request *c)
+{
+	static const struct got_error *err;
+	const char *errstr;
+	char ch, *last, *path = c->path_info;
+	size_t seg, i;
+
+	while (*path == '/')
+		path++;
+
+	if (*path == '\0')
+		return NULL;
+
+	/* if path is just a number, then it's the index page number */
+	qs->index_page = strtonum(path, 0, INT64_MAX, &errstr);
+	if (errstr == NULL)
+		return NULL;
+	qs->index_page = -1;
+
+	seg = strcspn(path, "/");
+	if ((qs->path = strndup(path, seg)) == NULL)
+		return got_error_from_errno("strdup");
+
+	path += seg + 1;
+	while (*path == '/')
+		++path;
+	if (*path == '\0') {
+		qs->action = SUMMARY;
+		return 0;
+	}
+
+	seg = strcspn(path, "/");
+	ch = path[seg];
+	path[seg] = '\0';
+	qs->action = ERR;
+	for (i = 0; i < nitems(action_keys); i++) {
+		if (strcmp(path, action_keys[i].name) != 0)
+			continue;
+		qs->action = action_keys[i].action;
+		break;
+	}
+	path[seg] = ch;
+	if (qs->action == ERR)
+		return got_error(GOT_ERR_BAD_QUERYSTRING);
+
+	if (path[seg] == '\0')
+		return 0;
+	path += seg + 1;
+
+	seg = strcspn(path, "/");
+	qs->headref = strndup(path, seg);
+	if (qs->headref == NULL)
+		return got_error_from_errno("strndup");
+	err = gotweb_urldecode(qs->headref);
+	if (err != NULL)
+		return err;
+	qs->commit = strdup(qs->headref);
+	if (qs->commit == NULL)
+		return got_error_from_errno("strdup");
+
+	if (path[seg] == '\0')
+		return 0;
+	path += seg + 1;
+
+	last = strrchr(path, '/');
+	if (last == NULL)
+		last = path;
+
+	qs->folder = strndup(path, last - path);
+	if (qs->folder == NULL)
+		return got_error_from_errno("strndup");
+
+	path = last;
+	while (*path == '/')
+		path++;
+	if (*path == '\0')
+		return NULL;
+
+	qs->file = strdup(path);
+	if (qs->file == NULL)
+		return got_error_from_errno("strdup");
+
+	return NULL;
+}
+
+static const struct got_error *
 gotweb_parse_querystring(struct querystring *qs, char *qst)
 {
 	const struct got_error *error = NULL;
@@ -960,74 +1134,44 @@ gotweb_action_name(int action)
 int
 gotweb_render_url(struct request *c, struct gotweb_url *url)
 {
-	const char *sep = "?", *action;
-	char *tmp;
+	char *enc;
 	int r;
 
-	action = gotweb_action_name(url->action);
-	if (action != NULL) {
-		if (tp_writef(c->tp, "?action=%s", action) == -1)
-			return -1;
-		sep = "&";
-	}
+	if (tp_writef(c->tp, "%s/", c->script_name) == -1)
+		return -1;
 
-	if (url->commit) {
-		if (tp_writef(c->tp, "%scommit=%s", sep, url->commit) == -1)
-			return -1;
-		sep = "&";
+	if (url->action == INDEX) {
+		if (url->index_page != -1)
+			return tp_writef(c->tp, "%d", url->index_page);
+		return 0;
 	}
 
-	if (url->file) {
-		tmp = gotweb_urlencode(url->file);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%sfile=%s", sep, tmp);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
+	assert(url->path != NULL);
+	if (tp_writef(c->tp, "%s/", url->path) == -1)
+		return -1;
 
-	if (url->folder) {
-		tmp = gotweb_urlencode(url->folder);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%sfolder=%s", sep, tmp);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
+	if (url->action == SUMMARY)
+		return 0;
 
-	if (url->headref) {
-		tmp = gotweb_urlencode(url->headref);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%sheadref=%s", sep, url->headref);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
+	if (tp_writef(c->tp, "%s/", gotweb_action_name(url->action)) == -1)
+		return -1;
 
-	if (url->index_page != -1) {
-		if (tp_writef(c->tp, "%sindex_page=%d", sep,
-		    url->index_page) == -1)
-			return -1;
-		sep = "&";
-	}
+	/* must have a commit id or headref */
+	if (url->commit == NULL && url->headref == NULL)
+		return 0;
+	enc = gotweb_urlencode(url->commit ? url->commit : url->headref);
+	if (enc == NULL)
+		return -1;
+	r = tp_writef(c->tp, "%s/", enc);
+	free(enc);
+	if (r == -1)
+		return -1;
 
-	if (url->path) {
-		tmp = gotweb_urlencode(url->path);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%spath=%s", sep, tmp);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
-
+	if (url->folder && url->folder[0] != '\0' &&
+	    tp_writef(c->tp, "%s/", url->folder) == -1)
+		return -1;
+	if (url->file && tp_writes(c->tp, url->file) == -1)
+		return -1;
 	return 0;
 }
 
blob - c0047da16df32ddb81665645f2025c12d9cf8250
file + gotwebd/gotwebd.h
--- gotwebd/gotwebd.h
+++ gotwebd/gotwebd.h
@@ -50,7 +50,10 @@
 /* GOTWEB DEFAULTS */
 #define MAX_QUERYSTRING		 2048
 #define MAX_DOCUMENT_URI	 255
+#define MAX_DOCUMENT_ROOT	 255
 #define MAX_SERVER_NAME		 255
+#define MAX_SCRIPT_NAME		 255
+#define MAX_PATH_INFO		 2048
 
 #define GOTWEB_GIT_DIR		 ".git"
 
@@ -249,7 +252,10 @@ struct request {
 
 	char				 querystring[MAX_QUERYSTRING];
 	char				 document_uri[MAX_DOCUMENT_URI];
+	char				 document_root[MAX_DOCUMENT_ROOT];
 	char				 server_name[MAX_SERVER_NAME];
+	char				 script_name[MAX_SCRIPT_NAME];
+	char				 path_info[MAX_PATH_INFO];
 	int				 https;
 
 	uint8_t				 request_started;
blob - 22583fa65fa1ec7208c4cc6061197077ced00de8
file + gotwebd/pages.tmpl
--- gotwebd/pages.tmpl
+++ gotwebd/pages.tmpl
@@ -167,7 +167,7 @@ nextsep(char *s, char **t)
 	struct server		*srv = c->srv;
 	struct querystring	*qs = c->t->qs;
 	struct gotweb_url	 u_path;
-	const char		*prfx = c->document_uri;
+	const char		*prfx = c->script_name;
 	const char		*css = srv->custom_css;
 
 	memset(&u_path, 0, sizeof(u_path));
@@ -182,18 +182,18 @@ nextsep(char *s, char **t)
     <meta name="viewport" content="initial-scale=1.0" />
     <meta name="msapplication-TileColor" content="#da532c" />
     <meta name="theme-color" content="#ffffff"/>
-    <link rel="apple-touch-icon" sizes="180x180" href="{{ prfx }}apple-touch-icon.png" />
-    <link rel="icon" type="image/png" sizes="32x32" href="{{ prfx }}favicon-32x32.png" />
-    <link rel="icon" type="image/png" sizes="16x16" href="{{ prfx }}favicon-16x16.png" />
-    <link rel="manifest" href="{{ prfx }}site.webmanifest"/>
-    <link rel="mask-icon" href="{{ prfx }}safari-pinned-tab.svg" />
-    <link rel="stylesheet" type="text/css" href="{{ prfx }}{{ css }}" />
+    <link rel="apple-touch-icon" sizes="180x180" href="{{ prfx }}/apple-touch-icon.png" />
+    <link rel="icon" type="image/png" sizes="32x32" href="{{ prfx }}/favicon-32x32.png" />
+    <link rel="icon" type="image/png" sizes="16x16" href="{{ prfx }}/favicon-16x16.png" />
+    <link rel="manifest" href="{{ prfx }}/site.webmanifest"/>
+    <link rel="mask-icon" href="{{ prfx }}/safari-pinned-tab.svg" />
+    <link rel="stylesheet" type="text/css" href="{{ prfx }}/{{ css }}" />
   </head>
   <body>
     <header id="header">
       <div id="got_link">
         <a href="{{ srv->logo_url }}" target="_blank">
-          <img src="{{ prfx }}{{ srv->logo }}" />
+          <img src="{{ prfx }}/{{ srv->logo }}" />
         </a>
       </div>
     </header>
@@ -755,7 +755,8 @@ nextsep(char *s, char **t)
 
 	folder = qs->folder ? qs->folder : "";
 	if (S_ISDIR(mode)) {
-		if (asprintf(&dir, "%s/%s", folder, name) == -1)
+		if (asprintf(&dir, "%s%s%s", folder,
+		    *folder ? "/" : "", name) == -1)
 			return (-1);
 
 		url.action = TREE;
blob - e4e422a7568c7bafebdb57d3377c40636ff1945b
file + gotwebd/sockets.c
--- gotwebd/sockets.c
+++ gotwebd/sockets.c
@@ -221,6 +221,9 @@ sockets_launch(void)
 	if (error)
 		fatal("%s", error->msg);
 
+	if (unveil("/", "r") == -1)
+		fatal("unveil");
+
 	if (unveil(NULL, NULL) == -1)
 		fatal("unveil");
 }