On Sat, Jan 30, 2021 at 01:11:17PM +0100, g...@suckless.org wrote:
> commit 319ba7083fdde836d6614c6b8b228bf3a9849e95
> Author:     Laslo Hunhold <d...@frign.de>
> AuthorDate: Sat Jan 30 12:53:00 2021 +0100
> Commit:     Laslo Hunhold <d...@frign.de>
> CommitDate: Sat Jan 30 13:10:32 2021 +0100
> 
>     Ignore queries and fragments in URIs
>     
>     Previously, a request for "/index.html" would yield a 200, while a
>     request for "/index.html?foo=bar" would yield a 404, as quark would
>     look for the file "index.html?foo=bar" in the serve directory.
>     
>     To accomodate this behaviour, it's no longer sufficient to just compare
>     realuri and req->uri. Instead, we set a "dirty" flag every time we
>     change the URI in such a way that it requires a redirect.
>     
>     According to RFC 3986 section 3, queries and fragments are there
>     to (further) "identify a resource within the scope of the URI's scheme
>     and naming authority (if any)". However, it's perfectly legitimate to
>     just ignore this further specification when the URI itself is already
>     pointing at a unique resource (i.e. "/index.html").
>     
>     This behaviour is consistent with dynamic web applications which usually
>     ignore parameters they don't care about. Quark is too much Zen to care
>     about any parameters. This has the added bonus that you can now clone
>     repositories (read-only) via the "dumb" HTTP git-protocol, so
>     
>             git clone https://example.org/git/project.git
>     
>     is now possible (provided you run update-server-info during the
>     post-update-hook). This wouldn't work previously because git, when
>     asked to clone via HTTP, would first probe the server with a request for
>     
>             project.git/info/refs?service=git-upload-pack
>     
>     (i.e. asking for the "smart" HTTP git-protocol to confirm). Quark would
>     return a 404, though, while git only gracefully "downgrades" to the
>     "dumb" HTTP git-protocol if the request succeeds but only yields a basic
>     200 response without special git-headers.
>     
>     This way, it is now trivial to also share git-repositories (and other
>     gracefully-downgrading protocols). While the "dumb" HTTP git-protocol
>     only supports read-only-access, I don't think that's much of an overall
>     loss (to the contrary!).
>     
>     HTTP authentication is broken and it makes much more sense to enable
>     ssh-access to contributors and make them push changes via ssh. The key
>     advantage of HTTP-cloning over git://-cloning is the fact that the git
>     protocol can be tampered with, while the HTTP-protocol can be encapsulated
>     into a secure TLS connection.
>     
>     Signed-off-by: Laslo Hunhold <d...@frign.de>
> 

Cool story, bro.

> diff --git a/http.c b/http.c
> index b74c61d..f52da8a 100644
> --- a/http.c
> +++ b/http.c
> @@ -368,12 +368,12 @@ static int
>  normabspath(char *path)
>  {
>       size_t len;
> -     int last = 0;
> +     int dirty = 0, last = 0;
>       char *p, *q;
>  
>       /* require and skip first slash */
>       if (path[0] != '/') {
> -             return 1;
> +             return -1;
>       }
>       p = path + 1;
>  
> @@ -387,7 +387,9 @@ normabspath(char *path)
>                       last = 1;
>               }
>  
> -             if (p == q || (q - p == 1 && p[0] == '.')) {
> +             if (*p == '\0') {
> +                     break;
> +             } else if (p == q || (q - p == 1 && p[0] == '.')) {
>                       /* "/" or "./" */
>                       goto squash;
>               } else if (q - p == 2 && p[0] == '.' && p[1] == '.') {
> @@ -412,9 +414,10 @@ squash:
>                       memmove(p, q + 1, len - ((q + 1) - path) + 2);
>                       len -= (q + 1) - p;
>               }
> +             dirty = 1;
>       }
>  
> -     return 0;
> +     return dirty;
>  }
>  
>  static enum status
> @@ -562,7 +565,7 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>       struct tm tm = { 0 };
>       struct vhost *vhost;
>       size_t len, i;
> -     int hasport, ipv6host;
> +     int dirty = 0, hasport, ipv6host;
>       static char realuri[PATH_MAX], tmpuri[PATH_MAX];
>       char *p, *mime;
>       const char *targethost;
> @@ -570,11 +573,29 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>       /* empty all response fields */
>       memset(res, 0, sizeof(*res));
>  
> -     /* make a working copy of the URI and normalize it */
> +     /*
> +      * make a working copy of the URI, strip queries and fragments
> +      * (ignorable according to RFC 3986 section 3) and normalize it
> +      */
>       memcpy(realuri, req->uri, sizeof(realuri));
> -     if (normabspath(realuri)) {
> +
> +     if ((p = strchr(realuri, '?'))) {
> +             *p = '\0';
> +     } else if ((p = strchr(realuri, '#'))) {
> +             *p = '\0';
> +     }
> +
> +     switch (normabspath(realuri)) {
> +     case -1:
>               s = S_BAD_REQUEST;
>               goto err;
> +     case 0:
> +             /* string is unchanged */
> +             break;
> +     case 1:
> +             /* string was changed */
> +             dirty = 1;
> +             break;
>       }
>  
>       /* match vhost */
> @@ -594,10 +615,12 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>               }
>  
>               /* if we have a vhost prefix, prepend it to the URI */
> -             if (vhost->prefix &&
> -                 prepend(realuri, LEN(realuri), vhost->prefix)) {
> -                     s = S_REQUEST_TOO_LARGE;
> -                     goto err;
> +             if (vhost->prefix) {
> +                     if (prepend(realuri, LEN(realuri), vhost->prefix)) {
> +                             s = S_REQUEST_TOO_LARGE;
> +                             goto err;
> +                     }
> +                     dirty = 1;
>               }
>       }
>  
> @@ -618,14 +641,23 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>                               s = S_REQUEST_TOO_LARGE;
>                               goto err;
>                       }
> +                     dirty = 1;
>                       break;
>               }
>       }
>  
>       /* normalize URI again, in case we introduced dirt */
> -     if (normabspath(realuri)) {
> +     switch (normabspath(realuri)) {
> +     case -1:
>               s = S_BAD_REQUEST;
>               goto err;
> +     case 0:
> +             /* string is unchanged */
> +             break;
> +     case 1:
> +             /* string was changed */
> +             dirty = 1;
> +             break;
>       }
>  
>       /* stat the relative path derived from the URI */
> @@ -644,6 +676,7 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>               if (len > 0 && realuri[len - 1] != '/') {
>                       realuri[len] = '/';
>                       realuri[len + 1] = '\0';
> +                     dirty = 1;
>               }
>       }
>  
> @@ -658,10 +691,10 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>       }
>  
>       /*
> -      * redirect if the original URI and the "real" URI differ or if
> -      * the requested host is non-canonical
> +      * redirect if the URI needs to be redirected or the requested
> +      * host is non-canonical
>        */
> -     if (strcmp(req->uri, realuri) || (srv->vhost && vhost &&
> +     if (dirty || (srv->vhost && vhost &&
>           strcmp(req->field[REQ_HOST], vhost->chost))) {
>               res->status = S_MOVED_PERMANENTLY;
>  
> @@ -716,12 +749,12 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>                * (optionally including the vhost servedir as a prefix)
>                * into the actual response-path
>                */
> -             if (esnprintf(res->uri, sizeof(res->uri), "%s", req->uri)) {
> +             if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) {
>                       s = S_REQUEST_TOO_LARGE;
>                       goto err;
>               }
>               if (esnprintf(res->path, sizeof(res->path), "%s%s",
> -                 vhost ? vhost->dir : "", RELPATH(req->uri))) {
> +                 vhost ? vhost->dir : "", RELPATH(realuri))) {
>                       s = S_REQUEST_TOO_LARGE;
>                       goto err;
>               }
> @@ -733,7 +766,7 @@ http_prepare_response(const struct request *req, struct 
> response *res,
>                * the URI
>                */
>               if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s",
> -                           req->uri, srv->docindex)) {
> +                           realuri, srv->docindex)) {
>                       s = S_REQUEST_TOO_LARGE;
>                       goto err;
>               }
> 

-- 
Kind regards,
Hiltjo

Reply via email to