On 7/3/20 9:48 PM, minf...@apache.org wrote:
> Author: minfrin
> Date: Fri Jul 3 19:48:53 2020
> New Revision: 1879488
>
> URL: http://svn.apache.org/viewvc?rev=1879488&view=rev
> Log:
> Add MMAP support to ETag generation.
>
> Modified:
> httpd/httpd/trunk/server/util_etag.c
>
> Modified: httpd/httpd/trunk/server/util_etag.c
> URL:
> http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_etag.c?rev=1879488&r1=1879487&r2=1879488&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/util_etag.c (original)
> +++ httpd/httpd/trunk/server/util_etag.c Fri Jul 3 19:48:53 2020
> @@ -84,6 +88,113 @@ static void etag_end(char *next, const c
> }
>
> /*
> + * Construct a strong ETag by creating a SHA1 hash across the file content.
> + */
> +static char *make_digest_etag(request_rec *r, etag_rec *er, char *vlv,
> + apr_size_t vlv_len, char *weak, apr_size_t weak_len)
> +{
> + apr_sha1_ctx_t context;
> + unsigned char buf[4096]; /* keep this a multiple of 64 */
> + unsigned char digest[APR_SHA1_DIGESTSIZE];
> + apr_file_t *fd = NULL;
> + core_dir_config *cfg;
> + char *etag, *next;
> +
> + apr_size_t nbytes;
> + apr_off_t offset = 0, zero = 0;
> + apr_status_t status;
> +
> + cfg = (core_dir_config *)ap_get_core_module_config(r->per_dir_config);
> +
> + if (er->fd) {
> + fd = er->fd;
> + }
> + else if (er->pathname) {
> + if ((status = apr_file_open(&fd, er->pathname, APR_READ | APR_BINARY,
> + 0, r->pool)) != APR_SUCCESS) {
> + ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10251)
> + "Make etag: could not open %s", er->pathname);
> + return "";
> + }
> + }
> + if (!fd) {
> + return "";
> + }
> +
> + if ((status = apr_file_seek(fd, APR_CUR, &offset)) != APR_SUCCESS) {
> + ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10252)
> + "Make etag: could not seek");
> + if (er->pathname) {
> + apr_file_close(fd);
> + }
> + return "";
> + }
> +
> + if ((status = apr_file_seek(fd, APR_SET, &zero)) != APR_SUCCESS) {
> + ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
> + "Make etag: could not seek");
> + if (er->pathname) {
> + apr_file_close(fd);
> + }
> + return "";
> + }
> +
> +#if APR_HAS_MMAP
> + if (cfg->enable_mmap != ENABLE_MMAP_OFF && er->fd &&
> + er->finfo->size >= APR_MMAP_THRESHOLD &&
> + er->finfo->size < APR_MMAP_LIMIT) {
> + apr_mmap_t *mm;
> +
> + if ((status = apr_mmap_create(&mm, er->fd, 0, er->finfo->size,
> + APR_MMAP_READ, r->pool) != APR_SUCCESS)) {
> + ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
> + "Make etag: could not mmap");
> + return "";
> + }
> +
> + }
> +#endif
> +
> + apr_sha1_init(&context);
> + nbytes = sizeof(buf);
> + while ((status = apr_file_read(fd, buf, &nbytes)) == APR_SUCCESS) {
Why do we still use apr_file_read in case we use MMAP? IMHO we should use
apr_mmap_offset.
But we would need to consider the amount of memory we map at the same time for
large files
in order to avoid exhausting the memory. So I would propose that we create a
brigade with one
filebucket and do bucket reads. So like the following code from the default
handler (excluding the reading):
bb = apr_brigade_create(r->pool, c->bucket_alloc);
e = apr_brigade_insert_file(bb, fd, 0, r->finfo.size, r->pool);
#if APR_HAS_MMAP
if (d->enable_mmap == ENABLE_MMAP_OFF) {
(void)apr_bucket_file_enable_mmap(e, 0);
}
#endif
Of course that would require to get the finfo in case we open the file
descriptor on our own.
The reading would then be something like (without error handling):
while (!APR_BRIGADE_EMPTY(bb))
{
e = APR_BUCKET_FIRST(bb)
apr_bucket_read(e, &str, &len);
apr_sha1_update_binary(&context, str, len);
apr_bucket_delete(e);
}
Regards
RĂ¼diger