On 7/3/20 9:48 PM, minf...@apache.org wrote:
> Author: minfrin
> Date: Fri Jul  3 19:48:53 2020
> New Revision: 1879488
> 
> URL: http://svn.apache.org/viewvc?rev=1879488&view=rev
> Log:
> Add MMAP support to ETag generation.
> 
> Modified:
>     httpd/httpd/trunk/server/util_etag.c
> 
> Modified: httpd/httpd/trunk/server/util_etag.c
> URL: 
> http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_etag.c?rev=1879488&r1=1879487&r2=1879488&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/util_etag.c (original)
> +++ httpd/httpd/trunk/server/util_etag.c Fri Jul  3 19:48:53 2020

> @@ -84,6 +88,113 @@ static void etag_end(char *next, const c
>  }
>  
>  /*
> + * Construct a strong ETag by creating a SHA1 hash across the file content.
> + */
> +static char *make_digest_etag(request_rec *r, etag_rec *er, char *vlv,
> +             apr_size_t vlv_len, char *weak, apr_size_t weak_len)
> +{
> +    apr_sha1_ctx_t context;
> +    unsigned char buf[4096]; /* keep this a multiple of 64 */
> +    unsigned char digest[APR_SHA1_DIGESTSIZE];
> +    apr_file_t *fd = NULL;
> +    core_dir_config *cfg;
> +    char *etag, *next;
> +
> +    apr_size_t nbytes;
> +    apr_off_t offset = 0, zero = 0;
> +    apr_status_t status;
> +
> +    cfg = (core_dir_config *)ap_get_core_module_config(r->per_dir_config);
> +
> +    if (er->fd) {
> +        fd = er->fd;
> +    }
> +    else if (er->pathname) {
> +        if ((status = apr_file_open(&fd, er->pathname, APR_READ | APR_BINARY,
> +                0, r->pool)) != APR_SUCCESS) {
> +            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10251)
> +                          "Make etag: could not open %s", er->pathname);
> +            return "";
> +        }
> +    }
> +    if (!fd) {
> +        return "";
> +    }
> +
> +    if ((status = apr_file_seek(fd, APR_CUR, &offset)) != APR_SUCCESS) {
> +        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10252)
> +                      "Make etag: could not seek");
> +        if (er->pathname) {
> +            apr_file_close(fd);
> +        }
> +        return "";
> +    }
> +
> +    if ((status = apr_file_seek(fd, APR_SET, &zero)) != APR_SUCCESS) {
> +        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
> +                      "Make etag: could not seek");
> +        if (er->pathname) {
> +            apr_file_close(fd);
> +        }
> +        return "";
> +    }
> +
> +#if APR_HAS_MMAP
> +    if (cfg->enable_mmap != ENABLE_MMAP_OFF && er->fd &&
> +            er->finfo->size >= APR_MMAP_THRESHOLD &&
> +            er->finfo->size < APR_MMAP_LIMIT) {
> +        apr_mmap_t *mm;
> +
> +        if ((status = apr_mmap_create(&mm, er->fd, 0, er->finfo->size,
> +                            APR_MMAP_READ, r->pool) != APR_SUCCESS)) {
> +            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
> +                          "Make etag: could not mmap");
> +            return "";
> +        }
> +
> +    }
> +#endif
> +
> +    apr_sha1_init(&context);
> +    nbytes = sizeof(buf);
> +    while ((status = apr_file_read(fd, buf, &nbytes)) == APR_SUCCESS) {

Why do we still use apr_file_read in case we use MMAP? IMHO we should use 
apr_mmap_offset.
But we would need to consider the amount of memory we map at the same time for 
large files
in order to avoid exhausting the memory. So I would propose that we create a 
brigade with one
filebucket and do bucket reads. So like the following code from the default 
handler (excluding the reading):

        bb = apr_brigade_create(r->pool, c->bucket_alloc);

        e = apr_brigade_insert_file(bb, fd, 0, r->finfo.size, r->pool);

#if APR_HAS_MMAP
            if (d->enable_mmap == ENABLE_MMAP_OFF) {
                (void)apr_bucket_file_enable_mmap(e, 0);
            }
#endif

Of course that would require to get the finfo in case we open the file 
descriptor on our own.

The reading would then be something like (without error handling):

while (!APR_BRIGADE_EMPTY(bb))
{
      e = APR_BUCKET_FIRST(bb)
      apr_bucket_read(e, &str, &len);
      apr_sha1_update_binary(&context, str, len);
      apr_bucket_delete(e);
}


Regards

RĂ¼diger

Reply via email to