"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
WIP: rework gotwebd URL style
To:
gameoftrees@openbsd.org
Date:
Tue, 28 May 2024 17:35:45 +0200

Download raw body.

Thread
First of all, this is work in progress and not intended to be committed
as-is.

The idea is to change the structure of gotwebd URLs since the current
one is both overly verbose and IMHO a bit ugly.

My proposal is to use the following general format

	repository-name/action/(commit or ref)/path

which can be of course abbreviated.  for example "/amused.git/" as an
implicit action of SUMMARY, and "/" the implicit action of INDEX.  The
special case "/N" with N being a number is to paginate the INDEX.

There's no ambiguity with refs that have a '/' in them since I'm
percent-encoding that field, for e.g.

	telescope.git/tree/proposal%2Fbreak-line/contrib/

("proposal%2Fbreak-line" is "proposal/break-line", and the path is
"/contrib/")

There's however an added ambiguity between commits and refs, like in
got(1) command line.  got_operations.c is probably not ready to handle
this, and it's something that I still have to look into.

This also change gotwebd to optionally serve the assets by itself.  This
is not strictly needed, but it prevents the httpd configuration to grow.
(we'd need 6 location blocks instead of one otherwise :/)

of course httpd is trying to be smart and failing badly at it.  I'm using
this hammer at the moment, will look for a proper solution.

: --- usr.sbin/httpd/httpd.c
: +++ usr.sbin/httpd/httpd.c
: @@ -654,6 +654,9 @@ path_info(char *path)
:  	struct stat	 st;
:  	int		 ret;
:  
: +	if (!strncmp(path, "/htdocs/gotwebd/", 16))
: +		return (16);
: +
:  	start = path;
:  	end = start + strlen(path);

To avoid breaking existing links, I'm still keeping the code to parse
the querystring, so that we can safely rollover to the new URI scheme
without breakage.  (something i strongly care about.)

Finally, there are probably regressions.  I'm sending this to gauge the
interest and start a discussion before turning this into something
committable.

thoughts?


diff /home/op/w/got
commit - c2f5b3e63df878c687928a616ef24e87131a795e
path + /home/op/w/got
blob - 0b8297b06fb4197ffb6bbcabde06045db0db95d5
file + gotwebd/fcgi.c
--- gotwebd/fcgi.c
+++ gotwebd/fcgi.c
@@ -246,6 +246,13 @@ fcgi_parse_params(uint8_t *buf, uint16_t n, struct req
 			c->document_uri[val_len] = '\0';
 		}
 
+		if (val_len < MAX_DOCUMENT_ROOT &&
+		    name_len == 13 &&
+		    strncmp(buf, "DOCUMENT_ROOT", 13) == 0) {
+			memcpy(c->document_root, val, val_len);
+			c->document_root[val_len] = '\0';
+		}
+
 		if (val_len < MAX_SERVER_NAME &&
 		    name_len == 11 &&
 		    strncmp(buf, "SERVER_NAME", 11) == 0) {
@@ -253,6 +260,20 @@ fcgi_parse_params(uint8_t *buf, uint16_t n, struct req
 			c->server_name[val_len] = '\0';
 		}
 
+		if (val_len < MAX_SCRIPT_NAME &&
+		    name_len == 11 &&
+		    strncmp(buf, "SCRIPT_NAME", 11) == 0) {
+			memcpy(c->script_name, val, val_len);
+			c->script_name[val_len] = '\0';
+		}
+
+		if (val_len < MAX_PATH_INFO &&
+		    name_len == 9 &&
+		    strncmp(buf, "PATH_INFO", 9) == 0) {
+			memcpy(c->path_info, val, val_len);
+			c->path_info[val_len] = '\0';
+		}
+
 		if (name_len == 5 &&
 		    strncmp(buf, "HTTPS", 5) == 0)
 			c->https = 1;
blob - 8cc103e1ce5dd08b81df1be04fee1d2d01c8f66c
file + gotwebd/gotweb.c
--- gotwebd/gotweb.c
+++ gotwebd/gotweb.c
@@ -24,6 +24,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include <assert.h>
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
@@ -80,9 +81,12 @@ static const struct action_keys action_keys[] = {
 	{ "rss",	RSS },
 };
 
+static const struct got_error *gotweb_urldecode(char *);
 static const struct got_error *gotweb_init_querystring(struct querystring **);
 static const struct got_error *gotweb_parse_querystring(struct querystring *,
     char *);
+static const struct got_error *gotweb_parse_url(struct querystring *,
+    struct request *);
 static const struct got_error *gotweb_assign_querystring(struct querystring *,
     char *, char *);
 static int gotweb_render_index(struct template *);
@@ -138,6 +142,78 @@ gotweb_reply_file(struct request *c, const char *ctype
 	return gotweb_reply(c, 200, ctype, NULL);
 }
 
+static int
+do_static_file(struct request *c)
+{
+	struct stat	 sb;
+	FILE		*fp;
+	char		 path[PATH_MAX];
+	char		 buf[BUFSIZ];
+	const char	*ext;
+	const char	*mime = NULL;
+	int		 r;
+	size_t		 i, len;
+	const struct {
+		const char	*ext;
+		const char	*mime;
+	} pairs[] = {
+		{ "css", "text/css" },
+		{ "ico", "image/x-icon" },
+		{ "png", "image/png" },
+		{ "webmanifest", "application/manifest+json" },
+		{ "xml", "text/xml" },
+	};
+
+	ext = strrchr(c->path_info, '.');
+	if (ext == NULL)
+		return (0);
+	ext++;
+
+	for (i = 0; i < nitems(pairs); ++i) {
+		if (!strcmp(pairs[i].ext, ext)) {
+			mime = pairs[i].mime;
+			break;
+		}
+	}
+	if (mime == NULL)
+		return (0);
+
+	r = snprintf(path, sizeof(path), "%s/%s", c->document_root, c->path_info);
+	if (r < 0 || (size_t)r >= sizeof(path))
+		return (0);
+
+	fp = fopen(path, "r");
+	if (fp == NULL) {
+		log_debug("%s: fopen %s", __func__, path);
+		return (0);
+	}
+
+	if (fstat(fileno(fp), &sb) == -1) {
+		log_warn("fstat");
+		fclose(fp);
+		gotweb_reply(c, 500, "text/plain", NULL);
+		return (1);
+	}
+
+	if (tp_writef(c->tp, "Content-type: %s\r\n"
+	    "Content-Length: %lld\n\n", mime, (long long)sb.st_size) == -1)
+		goto done;
+	if (template_flush(c->tp) == -1)
+		goto done;
+
+	for (;;) {
+		len = fread(buf, 1, sizeof(buf), fp);
+		if (len == 0)
+			break;
+		if (fcgi_write(c, buf, len) == -1)
+			break;
+	}
+
+ done:
+	fclose(fp);
+	return (1);
+}
+
 void
 gotweb_process_request(struct request *c)
 {
@@ -150,6 +226,15 @@ gotweb_process_request(struct request *c)
 	size_t len;
 	int r, binary = 0;
 
+	got_path_strip_trailing_slashes(c->document_root);
+	got_path_strip_trailing_slashes(c->script_name);
+
+	log_warnx("---");
+	log_warnx("doc root    is %s", c->document_root);
+	log_warnx("script name is %s", c->script_name);
+	log_warnx("path info   is %s", c->path_info);
+	log_warnx("querystring is %s", c->querystring);
+
 	/* init the transport */
 	error = gotweb_init_transport(&c->t);
 	if (error) {
@@ -169,8 +254,17 @@ gotweb_process_request(struct request *c)
 		log_warnx("%s: %s", __func__, error->msg);
 		goto err;
 	}
+
+	if (do_static_file(c))
+		return;
+
 	c->t->qs = qs;
-	error = gotweb_parse_querystring(qs, c->querystring);
+
+	if (c->path_info[0] != '\0')
+		error = gotweb_parse_url(qs, c);
+	else
+		error = gotweb_parse_querystring(qs, c->querystring);
+
 	if (error) {
 		log_warnx("%s: %s", __func__, error->msg);
 		goto err;
@@ -465,6 +559,92 @@ gotweb_init_querystring(struct querystring **qs)
 }
 
 static const struct got_error *
+gotweb_parse_url(struct querystring *qs, struct request *c)
+{
+	static const struct got_error *err;
+	const char *errstr;
+	char ch, *last, *path = c->path_info;
+	size_t seg, i;
+
+	while (*path == '/')
+		path++;
+
+	if (*path == '\0')
+		return NULL;
+
+	/* if path is just a numer, then it's the index page number */
+	qs->index_page = strtonum(path, 0, INT64_MAX, &errstr);
+	if (errstr == NULL)
+		return NULL;
+	qs->index_page = -1;
+
+	seg = strcspn(path, "/");
+	if ((qs->path = strndup(path, seg)) == NULL)
+		return got_error_from_errno("strdup");
+
+	path += seg + 1;
+	while (*path == '/')
+		++path;
+	if (*path == '\0') {
+		qs->action = SUMMARY;
+		return 0;
+	}
+
+	seg = strcspn(path, "/");
+	ch = path[seg];
+	path[seg] = '\0';
+	qs->action = ERR;
+	for (i = 0; i < nitems(action_keys); i++) {
+		if (strcmp(path, action_keys[i].name) != 0)
+			continue;
+		qs->action = action_keys[i].action;
+		break;
+	}
+	path[seg] = ch;
+	if (qs->action == ERR)
+		return got_error(GOT_ERR_BAD_QUERYSTRING);
+
+	if (path[seg] == '\0')
+		return 0;
+	path += seg + 1;
+
+	seg = strcspn(path, "/");
+	qs->headref = strndup(path, seg);
+	if (qs->headref == NULL)
+		return got_error_from_errno("strndup");
+	err = gotweb_urldecode(qs->headref);
+	if (err != NULL)
+		return err;
+	qs->commit = strdup(qs->headref);
+	if (qs->commit == NULL)
+		return got_error_from_errno("strdup");
+
+	if (path[seg] == '\0')
+		return 0;
+	path += seg + 1;
+
+	last = strrchr(path, '/');
+	if (last == NULL)
+		last = path;
+
+	qs->folder = strndup(path, last - path);
+	if (qs->folder == NULL)
+		return got_error_from_errno("strndup");
+
+	path = last;
+	while (*path == '/')
+		path++;
+	if (*path == '\0')
+		return NULL;
+
+	qs->file = strdup(path);
+	if (qs->file == NULL)
+		return got_error_from_errno("strdup");
+
+	return NULL;
+}
+
+static const struct got_error *
 gotweb_parse_querystring(struct querystring *qs, char *qst)
 {
 	const struct got_error *error = NULL;
@@ -960,74 +1140,45 @@ gotweb_action_name(int action)
 int
 gotweb_render_url(struct request *c, struct gotweb_url *url)
 {
-	const char *sep = "?", *action;
-	char *tmp;
+	char *enc;
 	int r;
 
-	action = gotweb_action_name(url->action);
-	if (action != NULL) {
-		if (tp_writef(c->tp, "?action=%s", action) == -1)
-			return -1;
-		sep = "&";
-	}
+	if (tp_writef(c->tp, "%s%s", c->script_name,
+	    *c->script_name == '\0' ? "" : "/") == -1)
+		return -1;
 
-	if (url->commit) {
-		if (tp_writef(c->tp, "%scommit=%s", sep, url->commit) == -1)
-			return -1;
-		sep = "&";
+	if (url->action == INDEX) {
+		if (url->index_page != -1)
+			return tp_writef(c->tp, "%d", url->index_page);
+		return 0;
 	}
 
-	if (url->file) {
-		tmp = gotweb_urlencode(url->file);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%sfile=%s", sep, tmp);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
+	assert(url->path != NULL);
+	if (tp_writef(c->tp, "%s/", url->path) == -1)
+		return -1;
 
-	if (url->folder) {
-		tmp = gotweb_urlencode(url->folder);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%sfolder=%s", sep, tmp);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
+	if (url->action == SUMMARY)
+		return 0;
 
-	if (url->headref) {
-		tmp = gotweb_urlencode(url->headref);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%sheadref=%s", sep, url->headref);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
+	if (tp_writef(c->tp, "%s/", gotweb_action_name(url->action)) == -1)
+		return -1;
 
-	if (url->index_page != -1) {
-		if (tp_writef(c->tp, "%sindex_page=%d", sep,
-		    url->index_page) == -1)
-			return -1;
-		sep = "&";
-	}
+	/* must have a commit id or headref */
+	if (url->commit == NULL && url->headref == NULL)
+		return -1;
+	enc = gotweb_urlencode(url->commit ? url->commit : url->headref);
+	if (enc == NULL)
+		return -1;
+	r = tp_writef(c->tp, "%s/", enc);
+	free(enc);
+	if (r == -1)
+		return -1;
 
-	if (url->path) {
-		tmp = gotweb_urlencode(url->path);
-		if (tmp == NULL)
-			return -1;
-		r = tp_writef(c->tp, "%spath=%s", sep, tmp);
-		free(tmp);
-		if (r == -1)
-			return -1;
-		sep = "&";
-	}
-
+	if (url->folder && url->folder[0] != '\0' &&
+	    tp_writef(c->tp, "%s/", url->folder) == -1)
+		return -1;
+	if (url->file && tp_writes(c->tp, url->file) == -1)
+		return -1;
 	return 0;
 }
 
blob - c0047da16df32ddb81665645f2025c12d9cf8250
file + gotwebd/gotwebd.h
--- gotwebd/gotwebd.h
+++ gotwebd/gotwebd.h
@@ -50,7 +50,10 @@
 /* GOTWEB DEFAULTS */
 #define MAX_QUERYSTRING		 2048
 #define MAX_DOCUMENT_URI	 255
+#define MAX_DOCUMENT_ROOT	 255
 #define MAX_SERVER_NAME		 255
+#define MAX_SCRIPT_NAME		 255
+#define MAX_PATH_INFO		 2048
 
 #define GOTWEB_GIT_DIR		 ".git"
 
@@ -249,7 +252,10 @@ struct request {
 
 	char				 querystring[MAX_QUERYSTRING];
 	char				 document_uri[MAX_DOCUMENT_URI];
+	char				 document_root[MAX_DOCUMENT_ROOT];
 	char				 server_name[MAX_SERVER_NAME];
+	char				 script_name[MAX_SCRIPT_NAME];
+	char				 path_info[MAX_PATH_INFO];
 	int				 https;
 
 	uint8_t				 request_started;
blob - 22583fa65fa1ec7208c4cc6061197077ced00de8
file + gotwebd/pages.tmpl
--- gotwebd/pages.tmpl
+++ gotwebd/pages.tmpl
@@ -167,7 +167,7 @@ nextsep(char *s, char **t)
 	struct server		*srv = c->srv;
 	struct querystring	*qs = c->t->qs;
 	struct gotweb_url	 u_path;
-	const char		*prfx = c->document_uri;
+	const char		*prfx = c->script_name;
 	const char		*css = srv->custom_css;
 
 	memset(&u_path, 0, sizeof(u_path));
@@ -182,18 +182,18 @@ nextsep(char *s, char **t)
     <meta name="viewport" content="initial-scale=1.0" />
     <meta name="msapplication-TileColor" content="#da532c" />
     <meta name="theme-color" content="#ffffff"/>
-    <link rel="apple-touch-icon" sizes="180x180" href="{{ prfx }}apple-touch-icon.png" />
-    <link rel="icon" type="image/png" sizes="32x32" href="{{ prfx }}favicon-32x32.png" />
-    <link rel="icon" type="image/png" sizes="16x16" href="{{ prfx }}favicon-16x16.png" />
-    <link rel="manifest" href="{{ prfx }}site.webmanifest"/>
-    <link rel="mask-icon" href="{{ prfx }}safari-pinned-tab.svg" />
-    <link rel="stylesheet" type="text/css" href="{{ prfx }}{{ css }}" />
+    <link rel="apple-touch-icon" sizes="180x180" href="{{ prfx }}/apple-touch-icon.png" />
+    <link rel="icon" type="image/png" sizes="32x32" href="{{ prfx }}/favicon-32x32.png" />
+    <link rel="icon" type="image/png" sizes="16x16" href="{{ prfx }}/favicon-16x16.png" />
+    <link rel="manifest" href="{{ prfx }}/site.webmanifest"/>
+    <link rel="mask-icon" href="{{ prfx }}/safari-pinned-tab.svg" />
+    <link rel="stylesheet" type="text/css" href="{{ prfx }}/{{ css }}" />
   </head>
   <body>
     <header id="header">
       <div id="got_link">
         <a href="{{ srv->logo_url }}" target="_blank">
-          <img src="{{ prfx }}{{ srv->logo }}" />
+          <img src="{{ prfx }}/{{ srv->logo }}" />
         </a>
       </div>
     </header>
@@ -755,7 +755,8 @@ nextsep(char *s, char **t)
 
 	folder = qs->folder ? qs->folder : "";
 	if (S_ISDIR(mode)) {
-		if (asprintf(&dir, "%s/%s", folder, name) == -1)
+		if (asprintf(&dir, "%s%s%s", folder,
+		    *folder ? "/" : "", name) == -1)
 			return (-1);
 
 		url.action = TREE;
blob - e4e422a7568c7bafebdb57d3377c40636ff1945b
file + gotwebd/sockets.c
--- gotwebd/sockets.c
+++ gotwebd/sockets.c
@@ -221,6 +221,9 @@ sockets_launch(void)
 	if (error)
 		fatal("%s", error->msg);
 
+	if (unveil("/", "r") == -1)
+		fatal("unveil");
+
 	if (unveil(NULL, NULL) == -1)
 		fatal("unveil");
 }