From: Chengyu Zhu <[email protected]> Add an on-disk cache for OCI blobs and hook it into the ocierofs_iostream backend. The cache file is created under /var/run/erofs/cache/oci/ and sized to the blob length.
Reads probe the local sparse file with SEEK_HOLE and only download missing regions. This avoids redundant downloads and improves random access performance for remote images. Signed-off-by: Chengyu Zhu <[email protected]> --- lib/liberofs_oci.h | 1 + lib/remotes/oci.c | 212 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 205 insertions(+), 8 deletions(-) diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h index 5298f18..f7b26b6 100644 --- a/lib/liberofs_oci.h +++ b/lib/liberofs_oci.h @@ -62,6 +62,7 @@ struct ocierofs_ctx { struct ocierofs_iostream { struct ocierofs_ctx *ctx; u64 offset; + int cache_fd; }; /* diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c index ac8d495..8b253a3 100644 --- a/lib/remotes/oci.c +++ b/lib/remotes/oci.c @@ -5,6 +5,7 @@ */ #define _GNU_SOURCE #include "erofs/internal.h" +#include "erofs/defs.h" #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -12,6 +13,9 @@ #include <fcntl.h> #include <sys/types.h> #include <sys/stat.h> +#ifdef HAVE_SYS_SENDFILE_H +#include <sys/sendfile.h> +#endif #include <errno.h> #ifdef HAVE_CURL_CURL_H #include <curl/curl.h> @@ -29,6 +33,10 @@ #include "liberofs_private.h" #include "liberofs_gzran.h" +#ifndef SEEK_HOLE +#define SEEK_HOLE 4 +#endif + #ifdef OCIEROFS_ENABLED #define DOCKER_REGISTRY "docker.io" @@ -1425,25 +1433,208 @@ out: return ret; } -static ssize_t ocierofs_io_pread(struct erofs_vfile *vf, void *buf, size_t len, u64 offset) +static int ocierofs_cache(struct ocierofs_iostream *oci_iostream, off_t offset, size_t needed) { - struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vf->payload; + struct ocierofs_ctx *ctx = oci_iostream->ctx; void *download_buf = NULL; size_t download_size = 0; - ssize_t ret; + int ret = 0; + off_t hole, align_offset; + size_t download_len; + int layer_idx; + + if (oci_iostream->cache_fd < 0) { + char *path; + + mkdir("/var/run/erofs", 0777); + mkdir("/var/run/erofs/cache", 0777); + mkdir("/var/run/erofs/cache/oci", 0777); + + if (asprintf(&path, "/var/run/erofs/cache/oci/%s", + ctx->blob_digest ?: "erofs_oci_unknown") < 0) + return -ENOMEM; + + oci_iostream->cache_fd = open(path, O_RDWR | O_CREAT, 0666); + free(path); + + if (oci_iostream->cache_fd < 0) + return -errno; + + layer_idx = ocierofs_find_layer_by_digest(ctx, ctx->blob_digest); + if (layer_idx >= 0) { + if (ftruncate(oci_iostream->cache_fd, ctx->layers[layer_idx]->size) < 0) + return -errno; + } + } + + hole = lseek(oci_iostream->cache_fd, offset, SEEK_HOLE); + if (hole < 0) { + if (errno == ENXIO) + return 0; + return -errno; + } + if (hole >= offset + needed) + return 0; - ret = ocierofs_download_blob_range(oci_iostream->ctx, offset, len, + align_offset = round_down(hole, OCIEROFS_IO_CHUNK_SIZE); + download_len = max_t(size_t, offset + needed - align_offset, OCIEROFS_IO_CHUNK_SIZE); + + ret = ocierofs_download_blob_range(ctx, align_offset, download_len, &download_buf, &download_size); if (ret < 0) return ret; if (download_buf && download_size > 0) { - memcpy(buf, download_buf, download_size); - free(download_buf); - return download_size; + char *p = download_buf; + size_t to_write = download_size; + ssize_t written = 0; + + while (to_write > 0) { + ssize_t w = pwrite(oci_iostream->cache_fd, p, to_write, align_offset + written); + if (w < 0) { + if (errno == EINTR) + continue; + ret = -errno; + goto out_free; + } + written += w; + p += w; + to_write -= w; + } } - return 0; +out_free: + free(download_buf); + return ret; +} + +static ssize_t ocierofs_io_sendfile(struct erofs_vfile *vout, struct erofs_vfile *vin, + off_t *pos, size_t count) +{ + struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vin->payload; + off_t offset; + size_t remaining = count; + ssize_t total_written = 0; + int ret; + + if (!pos) + offset = oci_iostream->offset; + else + offset = *pos; + + ret = ocierofs_cache(oci_iostream, offset, count); + if (ret < 0) + return ret; + + while (remaining > 0) { + struct stat st; + + if (fstat(oci_iostream->cache_fd, &st) < 0) + return -errno; + + if (offset >= st.st_size) + break; + + size_t available = st.st_size - offset; + size_t chunk = min_t(size_t, remaining, available); + + if (chunk == 0) { + chunk = min_t(size_t, remaining, OCIEROFS_IO_CHUNK_SIZE); + } + +#if defined(HAVE_SYS_SENDFILE_H) && defined(HAVE_SENDFILE) + off_t in_offset = offset; + ssize_t sent; + + sent = sendfile(vout->fd, oci_iostream->cache_fd, &in_offset, chunk); + if (sent < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + if (total_written > 0) + goto out; + return -errno; + } + if (sent == 0) + break; + + total_written += sent; + remaining -= sent; + offset += sent; +#else + chunk = min_t(size_t, remaining, available); + if (chunk == 0) + break; + + char buf[32768]; + size_t to_read = min_t(size_t, chunk, sizeof(buf)); + ssize_t read_len, write_len; + + read_len = pread(oci_iostream->cache_fd, buf, to_read, offset); + if (read_len < 0) + return -errno; + if (read_len == 0) + break; + + char *p = buf; + size_t to_write = read_len; + while (to_write > 0) { + write_len = write(vout->fd, p, to_write); + if (write_len < 0) { + if (errno == EINTR) + continue; + return -errno; + } + p += write_len; + to_write -= write_len; + } + + total_written += read_len; + offset += read_len; + remaining -= read_len; +#endif + } + +out: + if (pos) + *pos = offset; + else + oci_iostream->offset = offset; + + return total_written; +} + +static ssize_t ocierofs_io_pread(struct erofs_vfile *vf, void *buf, size_t len, u64 offset) +{ + struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vf->payload; + size_t remaining = len; + char *p = buf; + ssize_t total_read = 0; + int ret; + + ret = ocierofs_cache(oci_iostream, offset, len); + if (ret < 0) + return ret; + + while (remaining > 0) { + size_t chunk = min_t(size_t, remaining, OCIEROFS_IO_CHUNK_SIZE); + ssize_t n; + + n = pread(oci_iostream->cache_fd, p, chunk, offset); + if (n < 0) + return -errno; + if (n == 0) + break; + + p += n; + offset += n; + remaining -= n; + total_read += n; + + if (n < chunk) + break; + } + + return total_read; } static ssize_t ocierofs_io_read(struct erofs_vfile *vf, void *buf, size_t len) @@ -1462,6 +1653,9 @@ static void ocierofs_io_close(struct erofs_vfile *vfile) { struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vfile->payload; + if (oci_iostream->cache_fd >= 0) + close(oci_iostream->cache_fd); + ocierofs_ctx_cleanup(oci_iostream->ctx); free(oci_iostream->ctx); free(oci_iostream); @@ -1472,6 +1666,7 @@ static struct erofs_vfops ocierofs_io_vfops = { .pread = ocierofs_io_pread, .read = ocierofs_io_read, .close = ocierofs_io_close, + .sendfile = ocierofs_io_sendfile, }; int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cfg) @@ -1499,6 +1694,7 @@ int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cf oci_iostream->ctx = ctx; oci_iostream->offset = 0; + oci_iostream->cache_fd = -1; *vfile = (struct erofs_vfile){.ops = &ocierofs_io_vfops}; *(struct ocierofs_iostream **)vfile->payload = oci_iostream; return 0; -- 2.47.1
