On Thu, May 22, 2025 at 07:04:52PM +0100, Tim Woodall wrote:
> Patches redone. Main changes:
> rename inusefile -> lockfile
> check the error code from ext2fs_block_iterate3
> diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c
> e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
> --- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
> +0000
> +++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
> +0000
> @@ -1238,6 +1238,25 @@
> return update_mtime(fs, dir, NULL);
> }
>
> +static int release_blocks_proc(ext2_filsys fs, blk64_t *blocknr,
> + e2_blkcnt_t blockcnt EXT2FS_ATTR((unused)),
> + blk64_t ref_block EXT2FS_ATTR((unused)),
> + int ref_offset EXT2FS_ATTR((unused)),
> + void *private)
> +{
> + blk64_t block = *blocknr;
> + blk64_t *last_cluster = (blk64_t *)private;
> + blk64_t cluster = EXT2FS_B2C(fs, block);
> +
> + if (cluster == *last_cluster)
> + return 0;
> +
> + *last_cluster = cluster;
> +
> + ext2fs_block_alloc_stats2(fs, block, -1);
> + return 0;
> +}
> +
> static int remove_inode(struct fuse2fs *ff, ext2_ino_t ino)
> {
> ext2_filsys fs = ff->fs;
> @@ -1279,8 +1298,9 @@
> goto write_out;
>
> if (ext2fs_inode_has_valid_blocks2(fs, (struct ext2_inode *)&inode)) {
> - err = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL,
> - 0, ~0ULL);
> + blk64_t last_cluster = 0;
> + err = ext2fs_block_iterate3(fs, ino, BLOCK_FLAG_READ_ONLY,
> + NULL, release_blocks_proc, &last_cluster);
> if (err) {
> ret = translate_error(fs, ino, err);
> goto write_out;
> diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c
> e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
> --- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
> +0000
> +++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
> +0000
> @@ -348,6 +348,8 @@
> unsigned long offset;
> FILE *err_fp;
> unsigned int next_generation;
> + int haslockfile;
> + char* lockfile;
> };
>
> #define FUSE2FS_CHECK_MAGIC(fs, ptr, num) do {if ((ptr)->magic != (num)) \
> @@ -3873,6 +3875,8 @@
> FUSE2FS_OPT("no_default_opts", no_default_opts, 1),
> FUSE2FS_OPT("norecovery", norecovery, 1),
> FUSE2FS_OPT("offset=%lu", offset, 0),
> + FUSE2FS_OPT("lockfile", haslockfile, 1),
> + FUSE2FS_OPT("lockfile=%s", lockfile, 0),
>
> FUSE_OPT_KEY("-V", FUSE2FS_VERSION),
> FUSE_OPT_KEY("--version", FUSE2FS_VERSION),
> @@ -3914,6 +3918,8 @@
> " -o offset=<bytes> similar to mount -o offset=<bytes>, mount
> the partition starting at <bytes>\n"
> " -o norecovery don't replay the journal (implies ro)\n"
> " -o fuse2fs_debug enable fuse2fs debugging\n"
> + " -o lockfile use <image>.lck to show that fuse is still
> using the file system image\n"
> + " -o lockfile=<file> file to show that fuse is still using the
> file system image\n"
Yeah, we probably need an explicit lock file when the ext4 image is
actually a regular file and not a blockdev that can be opened with
O_EXCL.
> "\n",
> outargs->argv[0]);
> if (key == FUSE2FS_HELPFULL) {
> @@ -3975,7 +3981,7 @@
> fctx.err_fp = fopen(logfile, "a");
> if (!fctx.err_fp) {
> perror(logfile);
> - goto out;
> + exit(1);
> }
> } else
> fctx.err_fp = stderr;
> @@ -3987,6 +3993,42 @@
> fctx.alloc_all_blocks = 1;
> }
>
> + ret = 1;
> + if (fctx.haslockfile && !fctx.lockfile) {
> + fctx.lockfile = malloc(strlen(fctx.device) + 5);
> + if (!fctx.lockfile) {
> + fprintf(stderr, "Out of memory\n");
> + goto out;
> + }
> + strcpy(fctx.lockfile, fctx.device);
> + strcat(fctx.lockfile, ".lck");
Would be neat to use asprintf here if it's available.
> + }
> + if (fctx.lockfile) {
> + int fd = open(fctx.lockfile, O_CREAT | O_EXCL | O_WRONLY, 0400);
> + if (fd == -1) {
> + if (errno == EEXIST)
> + fprintf(stderr, "Requested lockfile=%s but it
> already exists\n", fctx.lockfile);
> + else
> + fprintf(stderr, "Requested lockfile=%s but
> couldn't create: %s\n", fctx.lockfile, strerror(errno));
> + free(fctx.lockfile);
> + fctx.lockfile = NULL;
> + goto out;
> + }
> + close(fd);
> + char resolved[PATH_MAX];
> + if (!realpath(fctx.lockfile, resolved)) {
> + fprintf(stderr, "Could not resolve realpath for
> lockfile=%s: %s\n", fctx.lockfile, strerror(errno));
> + goto out;
> + }
> + char* resolveddup = strdup(resolved);
> + if (!resolveddup) {
> + fprintf(stderr, "Out of memory\n");
> + goto out;
> + }
> + free(fctx.lockfile);
> + fctx.lockfile = resolveddup;
> + }
> +
> /* Start up the fs (while we still can use stdout) */
> ret = 2;
> if (!fctx.ro)
> @@ -4107,6 +4149,11 @@
> com_err(argv[0], err, "while closing fs");
> global_fs = NULL;
> }
> + if(fctx.lockfile) {
> + err = unlink(fctx.lockfile);
> + if (err)
> + com_err(argv[0], err, "unlink: %s while unlinking
> '%s'", strerror(errno), fctx.lockfile);
You probably ought to free fctx.lockfile here?
> + }
> return ret;
> }
>
> diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c
> e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
> --- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
> +0000
> +++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
> +0000
> @@ -2040,6 +2040,147 @@
> return ret;
> }
>
> +struct block_context {
> + e2_blkcnt_t next_block;
> + off_t blksize;
> + off_t offset;
> + off_t pos;
> + off_t next_hole;
> + off_t next_data;
> +};
> +
> +static int
> +dumponeblock(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
> + blk64_t ref_block, int ref_offset, void * privdata)
> +{
> + struct block_context *p;
> + e2_blkcnt_t i;
> +
> + p = (struct block_context *)privdata;
> + printf("p->pos = %ld p->offset=%ld blockcnt=%lld\n", p->pos, p->offset,
> blockcnt);
dbg_printf, please
> +
> + // Stepping over a hole
> + e2_blkcnt_t holesize = blockcnt - p->next_block;
I'm confused by this -- what do pos and offset track? Is next_block a
logical block offset, in which case it should be a blk64_t? Or is it
actually a block count, since you perform arithmatic with blockcnt?
> + if (p->pos <= p->offset && p->pos + holesize * p->blksize > p->offset) {
> + // offset is in this hole
> + p->next_hole = p->offset;
> + } else if (p->pos > p->offset && p->pos < p->next_hole) {
> + // First hole after offset
> + p->next_hole = p->pos;
> + }
> + p->pos += p->blksize * holesize;
> +
> + // A data block
> + p->next_block = blockcnt + 1;
> + if (p->pos <= p->offset && p->pos + p->blksize > p->offset) {
> + // offset is in this data block
> + p->next_data = p->offset;
> + } else if (p->pos > p->offset && p->pos < p->next_data) {
> + // first data block after offset
> + p->next_data = p->pos;
> + }
> + p->pos += p->blksize;
> + return 0;
> +}
> +
> +
> +static off_t op_lseek(const char* path, off_t offset, int whence, struct
> fuse_file_info *fp)
> +{
> + struct fuse_context *ctxt = fuse_get_context();
> + struct fuse2fs *ff = (struct fuse2fs *)ctxt->private_data;
> + struct fuse2fs_file_handle *fh =
> + (struct fuse2fs_file_handle *)(uintptr_t)fp->fh;
> + ext2_filsys fs;
> + struct ext2_inode_large inode;
> + blk64_t start, end;
> + __u64 i_size;
> + errcode_t err;
> + int flags;
> +
> + FUSE2FS_CHECK_CONTEXT(ff);
> + fs = ff->fs;
> + FUSE2FS_CHECK_MAGIC(fs, fh, FUSE2FS_FILE_MAGIC);
> +
> + memset(&inode, 0, sizeof(inode));
> + err = ext2fs_read_inode_full(fs, fh->ino, (struct ext2_inode *)&inode,
EXT2_INODE(), not an explicit cast.
Also you need to take ff->bfl to prevent races inside libext2fs
resulting from concurrent operations. This would be much less bad if
there were proper buffer/inode caches with their own locks...
> + sizeof(inode));
> + if (err)
> + return err;
You ought to call translate_error to convert the libext2fs error codes
to errnos to be fed back to the kernel.
> + i_size = EXT2_I_SIZE(&inode);
> +
> + if (offset >= i_size)
> + return -ENXIO;
> +
> + struct block_context bc = {
> + .next_block = 0,
> + .blksize = fs->blocksize,
> + .offset = offset,
> + .pos = 0,
No need to explicitly initialize fields to zero; omitting them will do
that for you.
> + .next_hole = i_size,
> + .next_data = i_size,
> + };
> +
> + if (inode.i_mode & S_IFREG && inode.i_flags & EXT4_EXTENTS_FL) {
> + ext2_extent_handle_t handle = NULL;
> + struct ext2fs_extent extent;
> + int op = EXT2_EXTENT_ROOT;
> +
> + err = ext2fs_extent_open(fs, fh->ino, &handle);
> + if (err) {
> + // Why doesn't op_create do this?
It should, I'll fix it. Actually that whole thing is fubar, it also
needs to drop ff->bfl and free temp_path. Thanks for pointing that out.
--D
> + err = translate_error(fs, fh->ino, err);
> + return err;
> + }
> + while (1) {
> + err = ext2fs_extent_get(handle, op, &extent);
> + if (err == EXT2_ET_EXTENT_NO_NEXT)
> + break;
> + if (err) {
> + err = translate_error(fs, fh->ino, err);
> + ext2fs_extent_free(handle);
> + return err;
> + }
> + op = EXT2_EXTENT_NEXT;
> +
> + if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) {
> + continue;
> + }
> + if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) {
> + continue;
> + }
> +
> + blk64_t start = extent.e_pblk;
> + e2_blkcnt_t blockcnt = extent.e_lblk;
> + for(blk64_t blocknr = start; blocknr < start +
> extent.e_len; ++blocknr, ++blockcnt) {
> + // TODO We can be much more efficient here
> + dumponeblock(fs, &blocknr, blockcnt, 0, 0, &bc);
> + }
> + }
> + ext2fs_extent_free(handle);
> + } else if (inode.i_mode & S_IFREG && inode.i_flags &
> EXT4_INLINE_DATA_FL) {
> + if (whence == SEEK_DATA) {
> + return offset;
> + } else {
> + return i_size;
> + }
> + } else {
> + ext2fs_block_iterate3(fs, fh->ino, BLOCK_FLAG_DATA_ONLY, NULL,
> dumponeblock, &bc);
> + }
> +
> + /* deal with holes at the end of the inode */
> + if (i_size > bc.pos) {
> + if (bc.next_hole == i_size)
> + bc.next_hole = bc.pos > bc.offset ? bc.pos : bc.offset;
> + }
> +
> + if (whence == SEEK_DATA) {
> + if (bc.next_data == i_size) return -ENXIO;
> + return bc.next_data;
> + } else {
> + return bc.next_hole;
> + }
> +}
> +
> static int op_truncate(const char *path, off_t len
> #if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0)
> , struct fuse_file_info *fi EXT2FS_ATTR((unused))
> @@ -3695,6 +3836,7 @@
> .fallocate = op_fallocate,
> # endif
> #endif
> + .lseek = op_lseek,
> };
>
> static int get_random_bytes(void *p, size_t sz)