Add a .pread() interface to support gzip random access with the help of the previous zinfo gzip indices.
Note that (e)stargz formats are also supported. Signed-off-by: Gao Xiang <[email protected]> --- lib/gzran.c | 176 +++++++++++++++++++++++++++++++++++++++++++ lib/liberofs_gzran.h | 2 + 2 files changed, 178 insertions(+) diff --git a/lib/gzran.c b/lib/gzran.c index e3b4e89..eba65ce 100644 --- a/lib/gzran.c +++ b/lib/gzran.c @@ -201,6 +201,176 @@ int erofs_gzran_builder_final(struct erofs_gzran_builder *gb) free(gb); return 0; } + +struct erofs_gzran_iostream { + struct erofs_vfile *vin; + struct erofs_gzran_cutpoint *cp; + u32 entries; + u32 span_size; +}; + +static void erofs_gzran_ios_vfclose(struct erofs_vfile *vf) +{ + struct erofs_gzran_iostream *ios = + (struct erofs_gzran_iostream *)vf->payload; + free(ios->cp); + free(vf); +} + +static ssize_t erofs_gzran_ios_vfpread(struct erofs_vfile *vf, void *buf, size_t len, u64 offset) +{ + struct erofs_gzran_iostream *ios = + (struct erofs_gzran_iostream *)vf->payload; + struct erofs_gzran_cutpoint *cp = ios->cp; + u8 src[1 << 14], discard[EROFS_GZRAN_WINSIZE]; + unsigned int bits; + bool skip = true; + u64 inpos; + z_stream strm; + int ret; + + while (cp < ios->cp + ios->entries - 1 && cp[1].outpos <= offset) + ++cp; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); /* raw inflate */ + if (ret != Z_OK) + return -EFAULT; + + bits = cp->in_bitpos & 7; + inpos = (cp->in_bitpos >> 3) - (bits ? 1 : 0); + ret = erofs_io_pread(ios->vin, src, sizeof(src), inpos); + if (ret < 0) + return ret; + + if (bits) { + inflatePrime(&strm, bits, src[0] >> (8 - bits)); + strm.next_in = src + 1; + strm.avail_in = ret - 1; + } else { + strm.next_in = src; + strm.avail_in = ret; + } + inpos += ret; + (void)inflateSetDictionary(&strm, cp->window, sizeof(cp->window)); + + offset -= cp->outpos; + do { + /* define where to put uncompressed data, and how much */ + if (!offset && skip) { /* at offset now */ + strm.avail_out = len; + strm.next_out = buf; + skip = false; /* only do this once */ + } else if (offset > sizeof(discard)) { /* skip WINSIZE bytes */ + strm.avail_out = sizeof(discard); + strm.next_out = discard; + offset -= sizeof(discard); + } else if (offset) { /* last skip */ + strm.avail_out = (unsigned int)offset; + strm.next_out = discard; + offset = 0; + } + + /* uncompress until avail_out filled, or end of stream */ + do { + if (!strm.avail_in) { + ret = erofs_io_pread(ios->vin, src, sizeof(src), + inpos); + if (ret < 0) + return ret; + if (!ret) + return -EIO; + inpos += ret; + strm.avail_in = ret; + strm.next_in = src; + } + ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */ + if (ret == Z_NEED_DICT) + ret = Z_DATA_ERROR; + if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) + return -EIO; + if (ret == Z_STREAM_END) + break; + } while (strm.avail_out); + + /* if reach end of stream, then don't keep trying to get more */ + if (ret == Z_STREAM_END) + break; + + /* do until offset reached and requested data read, or stream ends */ + } while (skip); + return len - strm.avail_out; +} + +static struct erofs_vfops erofs_gzran_ios_vfops = { + .pread = erofs_gzran_ios_vfpread, + .close = erofs_gzran_ios_vfclose, +}; + +struct erofs_vfile *erofs_gzran_zinfo_open(struct erofs_vfile *vin, + void *zinfo_buf, unsigned int len) +{ + struct aws_soci_zinfo_header *h; + struct aws_soci_zinfo_ckpt *c; + struct erofs_vfile *vf; + struct erofs_gzran_iostream *ios; + unsigned int v2_size, version; + int ret, i; + + if (len && len < sizeof(*h)) + return ERR_PTR(-EINVAL); + + vf = malloc(sizeof(*vf) + sizeof(*ios)); + if (!vf) + return ERR_PTR(-ENOMEM); + + ios = (struct erofs_gzran_iostream *)vf->payload; + h = zinfo_buf; + ios->entries = le32_to_cpu(h->have); + ios->span_size = le32_to_cpu(h->span_size); + + v2_size = sizeof(*c) * ios->entries + sizeof(*h); + if (!len || v2_size == len) { + version = 2; + } else if (v2_size - sizeof(*c) == len) { + version = 1; + } else { + ret = -EOPNOTSUPP; + goto err_ios; + } + + ios->cp = malloc(sizeof(*ios->cp) * ios->entries); + if (!ios->cp) { + ret = -ENOMEM; + goto err_ios; + } + + i = 0; + if (version == 1) { + ios->cp[0] = (struct erofs_gzran_cutpoint) { + .in_bitpos = 10 << 3, + .outpos = 0, + }; + i = 1; + } + + c = (struct aws_soci_zinfo_ckpt *)(h + 1); + for (; i < ios->entries; ++i, ++c) { + ios->cp[i].in_bitpos = (le64_to_cpu(c->in) << 3) | c->bits; + ios->cp[i].outpos = le64_to_cpu(c->out); + memcpy(ios->cp[i].window, c->window, sizeof(*c->window)); + } + ios->vin = vin; + vf->ops = &erofs_gzran_ios_vfops; + return vf; +err_ios: + free(vf); + return ERR_PTR(ret); +} #else struct erofs_gzran_builder *erofs_gzran_builder_init(struct erofs_vfile *vf, u32 span_size) @@ -220,4 +390,10 @@ int erofs_gzran_builder_final(struct erofs_gzran_builder *gb) { return 0; } + +struct erofs_vfile *erofs_gzran_zinfo_open(struct erofs_vfile *vin, + void *zinfo_buf, unsigned int len) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif diff --git a/lib/liberofs_gzran.h b/lib/liberofs_gzran.h index 4764506..443fe15 100644 --- a/lib/liberofs_gzran.h +++ b/lib/liberofs_gzran.h @@ -18,4 +18,6 @@ int erofs_gzran_builder_export_zinfo(struct erofs_gzran_builder *gb, struct erofs_vfile *zinfo_vf); int erofs_gzran_builder_final(struct erofs_gzran_builder *gb); +struct erofs_vfile *erofs_gzran_zinfo_open(struct erofs_vfile *vin, + void *zinfo_buf, unsigned int len); #endif -- 2.43.5
