commit duperemove for openSUSE:Factory

h_root Fri, 31 Oct 2014 12:04:25 -0700

Hello community,

here is the log from the commit of package duperemove for openSUSE:Factory 
checked in at 2014-10-31 18:27:56
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/duperemove (Old)
 and      /work/SRC/openSUSE:Factory/.duperemove.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "duperemove"

Changes:
--------
--- /work/SRC/openSUSE:Factory/duperemove/duperemove.changes    2014-09-30 
19:42:03.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.duperemove.new/duperemove.changes       
2014-10-31 20:03:36.000000000 +0100
@@ -1,0 +2,11 @@
+Fri Oct 31 02:44:53 UTC 2014 - mfas...@suse.com
+
+- Update to duperemove v0.09.beta2
+  - fix memory leak
+  - fix hardlink detection on btrfs
+  - print file number status during csum phase
+  - print a status bar during extent seearch
+  - several bugfixes and performance improvements to extent search
+- Removed patch: 001-fix-build.patch
+
+-------------------------------------------------------------------

Old:
----
  001-fix-build.patch
  duperemove-v0.09.beta1.tar.gz

New:
----
  duperemove-v0.09.beta2.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ duperemove.spec ++++++
--- /var/tmp/diff_new_pack.49iyG2/_old  2014-10-31 20:03:37.000000000 +0100
+++ /var/tmp/diff_new_pack.49iyG2/_new  2014-10-31 20:03:37.000000000 +0100
@@ -17,13 +17,13 @@
 
 
 %define modname duperemove
-%define tar_version v0.09.beta1
+%define tar_version v0.09.beta2
 
 Name:           duperemove
 BuildRequires:  gcc-c++
 BuildRequires:  glib2-devel
 BuildRequires:  libgcrypt-devel
-Version:        0.09~beta1
+Version:        0.09~beta2
 Release:        0
 Summary:        Software to find duplicate extents in files and remove them
 License:        GPL-2.0
@@ -31,7 +31,6 @@
 Url:            https://github.com/markfasheh/duperemove
 Source:         %{modname}-%{tar_version}.tar.gz
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
-Patch1:         001-fix-build.patch
 
 %description
 Duperemove finds duplicate extents in files and prints them to the
@@ -48,7 +47,6 @@
 
 %prep
 %setup -q -n %{modname}-%{tar_version}
-%patch1 -p1
 
 %build
 make

++++++ duperemove-v0.09.beta1.tar.gz -> duperemove-v0.09.beta2.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/Makefile 
new/duperemove-v0.09.beta2/Makefile
--- old/duperemove-v0.09.beta1/Makefile 2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/Makefile 2014-10-31 00:27:39.000000000 +0100
@@ -1,11 +1,11 @@
-RELEASE=v0.09.beta1
+RELEASE=v0.09.beta2
 
 CC = gcc
 CFLAGS = -Wall -ggdb
 
 MANPAGES=duperemove.8 btrfs-extent-same.8
 
-DIST_SOURCES=csum-gcrypt.c csum-mhash.c csum.h duperemove.c hash-tree.c 
hash-tree.h results-tree.c results-tree.h kernel.h LICENSE list.h Makefile 
rbtree.c rbtree.h rbtree.txt README TODO dedupe.c dedupe.h btrfs-ioctl.h 
filerec.c filerec.h $(MANPAGES) btrfs-extent-same.c debug.h util.c util.h 
serialize.c serialize.h hashstats.c
+DIST_SOURCES=csum-gcrypt.c csum-mhash.c csum.h duperemove.c hash-tree.c 
hash-tree.h results-tree.c results-tree.h kernel.h LICENSE list.h Makefile 
rbtree.c rbtree.h rbtree.txt README TODO dedupe.c dedupe.h btrfs-ioctl.h 
filerec.c filerec.h btrfs-util.c btrfs-util.h $(MANPAGES) btrfs-extent-same.c 
debug.h util.c util.h serialize.c serialize.h hashstats.c
 DIST=duperemove-$(RELEASE)
 DIST_TARBALL=$(DIST).tar.gz
 TEMP_INSTALL_DIR:=$(shell mktemp -du -p .)
@@ -26,7 +26,7 @@
        $(crypt_CFLAGS) $(glib_CFLAGS)
 LIBRARY_FLAGS += $(crypt_LIBS) $(glib_LIBS)
 
-objects = duperemove.o rbtree.o hash-tree.o results-tree.o dedupe.o filerec.o 
util.o serialize.o $(hash_obj)
+objects = duperemove.o rbtree.o hash-tree.o results-tree.o dedupe.o filerec.o 
util.o serialize.o btrfs-util.o $(hash_obj)
 progs = duperemove
 
 DESTDIR = /
@@ -38,7 +38,7 @@
 all: $(progs) kernel.h list.h btrfs-ioctl.h debug.h
 
 duperemove: $(objects) kernel.h duperemove.c
-       $(CC) $(LIBRARY_FLAGS) $(CFLAGS) $(objects) -o duperemove
+       $(CC) $(CFLAGS) $(objects) -o duperemove $(LIBRARY_FLAGS)
 
 tarball: clean
        mkdir -p $(TEMP_INSTALL_DIR)/$(DIST)
@@ -60,14 +60,14 @@
        done
 
 csum-test: $(hash_obj) csum-test.c
-       $(CC) $(LIBRARY_FLAGS) $(CFLAGS) $(hash_obj) -o csum-test csum-test.c
+       $(CC) $(CFLAGS) $(hash_obj) -o csum-test csum-test.c  $(LIBRARY_FLAGS)
 
 filerec-test: filerec.c filerec.h rbtree.o
-       $(CC) $(LIBRARY_FLAGS) $(CFLAGS) -DFILEREC_TEST filerec.c rbtree.o -o 
filerec-test
+       $(CC) $(CFLAGS) -DFILEREC_TEST filerec.c rbtree.o -o filerec-test 
$(LIBRARY_FLAGS)
 
 hashstats_obj = $(hash_obj) rbtree.o hash-tree.o filerec.o util.o serialize.o 
results-tree.o
 hashstats: $(hashstats_obj) hashstats.c
-       $(CC) $(LIBRARY_FLAGS) $(CFLAGS) $(hashstats_obj) hashstats.c -o 
hashstats
+       $(CC) $(CFLAGS) $(hashstats_obj) hashstats.c -o hashstats 
$(LIBRARY_FLAGS)
 
 clean:
        rm -fr $(objects) $(progs) $(DIST_TARBALL) btrfs-extent-same 
filerec-test hashstats csum-*.o *~
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/TODO 
new/duperemove-v0.09.beta2/TODO
--- old/duperemove-v0.09.beta1/TODO     2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/TODO     2014-10-31 00:27:39.000000000 +0100
@@ -1,6 +1,3 @@
-- Inodes between subvolumes on a btrfs file system can have the same i_ino 
(hooray!).
-  We need to handle this, probably by recording subvolid on the filerec.
-
 - Store results of our search to speed up subsequent runs.
   - When writing hashes, store latest btrfs transaction id in the hash file 
(import find-new from btrfsprogs for this)
   - When reading hashes and we have a transaction id, do a scan of btrfs 
objects to see which inodes have changed.
@@ -14,6 +11,18 @@
 - Add an optional mode to do duplicate checking with resolution of extent
   owners.
 
+- Multi-threaded extent search.
+  - This needs locking and atomic updates to some fields, so it's not quite
+    as straight forward as threading the hash stage. A start would be to have
+    find_all_dups queue up appropriate dup_blocks_lists to worker threads.
+    The threads have to properly lock the filerec compared trees and results
+    trees. b_seen also needs to be read/written atomically.
+
+- csum_whole_file() still does a read/checksum of holes and unwritten
+  extents (even though we detect and mark them now).  If we calculate and
+  store (in memory) the checksum of a zero'd block, we can skip the read and
+  copy our known value directly into the block digest member.
+
 - Allow checking for similar files by some criteria (approximate size,
   file magic type, file extension, etc)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/btrfs-util.c 
new/duperemove-v0.09.beta2/btrfs-util.c
--- old/duperemove-v0.09.beta1/btrfs-util.c     1970-01-01 01:00:00.000000000 
+0100
+++ new/duperemove-v0.09.beta2/btrfs-util.c     2014-10-31 00:27:39.000000000 
+0100
@@ -0,0 +1,74 @@
+/*
+ * btrfs-util.c
+ *
+ * Copyright (C) 2014 SUSE.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Some parts of this taken from btrfs-progs, which is also GPLv2
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/statfs.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/magic.h>
+#include <linux/btrfs.h>
+
+#include "btrfs-util.h"
+#include "debug.h"
+
+/* For some reason linux/btrfs.h doesn't define this. */
+#define        BTRFS_FIRST_FREE_OBJECTID       256ULL
+
+/*
+ * For a given:
+ * - file or directory return the containing tree root id
+ * - subvolume return its own tree id
+ * - BTRFS_EMPTY_SUBVOL_DIR_OBJECTID (directory with ino == 2) the result is
+ *   undefined and function returns -1
+ */
+int lookup_btrfs_subvolid(int fd, uint64_t *subvolid)
+{
+       int ret;
+       struct btrfs_ioctl_ino_lookup_args args;
+
+       memset(&args, 0, sizeof(args));
+       args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+       ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+       if (ret)
+               return errno;
+
+       *subvolid = args.treeid;
+
+       return 0;
+}
+
+int check_file_btrfs(int fd, int *btrfs)
+{
+       int ret;
+       struct statfs fs;
+
+       *btrfs = 0;
+
+       ret = fstatfs(fd, &fs);
+       if (ret)
+               return errno;
+
+       if (fs.f_type == BTRFS_SUPER_MAGIC)
+               *btrfs = 1;
+
+       return ret;
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/btrfs-util.h 
new/duperemove-v0.09.beta2/btrfs-util.h
--- old/duperemove-v0.09.beta1/btrfs-util.h     1970-01-01 01:00:00.000000000 
+0100
+++ new/duperemove-v0.09.beta2/btrfs-util.h     2014-10-31 00:27:39.000000000 
+0100
@@ -0,0 +1,22 @@
+/*
+ * btrfs-util.h
+ *
+ * Copyright (C) 2014 SUSE.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef        __BTRFS_UTIL__
+#define        __BTRFS_UTIL__
+
+int check_file_btrfs(int fd, int *btrfs);
+int lookup_btrfs_subvolid(int fd, uint64_t *rootid);
+
+#endif /* __BTRFS_UTIL__ */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/debug.h 
new/duperemove-v0.09.beta2/debug.h
--- old/duperemove-v0.09.beta1/debug.h  2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/debug.h  2014-10-31 00:27:39.000000000 +0100
@@ -2,6 +2,7 @@
 #define        __DEBUG_H__
 
 #include <stdio.h>
+#include <stdbool.h>
 
 extern int verbose;
 extern int debug;
@@ -56,6 +57,7 @@
 declare_alloc_tracking_header(filerec);
 declare_alloc_tracking_header(files_compared);
 declare_alloc_tracking_header(filerec_token);
+declare_alloc_tracking_header(file_hash_head);
 /* Can be called anywhere we want to dump the above statistics */
 void print_mem_stats(void);
 
@@ -73,6 +75,45 @@
                }                                                       \
        } while(0)
 
+
+/*
+ * compiletime_assert and associated code taken from
+ * linux-3.14.git/include/linux/compiler.h
+ */
+
+# define __compiletime_warning(message)
+# define __compiletime_error(message)
+
+/*
+ * Sparse complains of variable sized arrays due to the temporary variable in
+ * __compiletime_assert. Unfortunately we can't just expand it out to make
+ * sparse see a constant array size without breaking compiletime_assert on old
+ * versions of GCC (e.g. 4.2.4), so hide the array from sparse altogether.
+ */
+#define __compiletime_error_fallback(condition) \
+       do { ((void)sizeof(char[1 - 2 * condition])); } while (0)
+
+#define __compiletime_assert(condition, msg, prefix, suffix)           \
+       do {                                                            \
+               bool __cond = !(condition);                             \
+               extern void prefix ## suffix(void) __compiletime_error(msg); \
+               if (__cond)                                             \
+                       prefix ## suffix();                             \
+               __compiletime_error_fallback(__cond);                   \
+       } while (0)
+
+/**
+ * compiletime_assert - break build and emit msg if condition is false
+ * @condition: a compile-time constant condition to check
+ * @msg:       a message to emit if condition is false
+ *
+ * In tradition of POSIX assert, this macro will break the build if the
+ * supplied condition is *false*, emitting the supplied error message if the
+ * compiler has support to do so.
+ */
+#define compiletime_assert(condition, msg) \
+       __compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
+
 /*
  * BUILD_BUG_ON() and associated code taken from
  * linux-2.6.git/include/linux/bug.h
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/duperemove.c 
new/duperemove-v0.09.beta2/duperemove.c
--- old/duperemove-v0.09.beta1/duperemove.c     2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/duperemove.c     2014-10-31 00:27:39.000000000 
+0100
@@ -17,7 +17,6 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/statfs.h>
 #include <limits.h>
 #include <fcntl.h>
 #include <unistd.h>
@@ -32,7 +31,6 @@
 #include <ctype.h>
 #include <getopt.h>
 #include <inttypes.h>
-#include <linux/magic.h>
 #include <assert.h>
 #include <linux/fiemap.h>
 
@@ -47,6 +45,7 @@
 #include "dedupe.h"
 #include "util.h"
 #include "serialize.h"
+#include "btrfs-util.h"
 #include "debug.h"
 
 /* exported via debug.h */
@@ -73,13 +72,17 @@
 static char *serialize_fname = NULL;
 static unsigned int hash_threads = 0;
 
+static int fancy_status = 0;
+
 static void debug_print_block(struct file_block *e)
 {
        struct filerec *f = e->b_file;
 
-       printf("%s\tloff: %llu lblock: %llu seen: %u\n", f->filename,
+       printf("%s\tloff: %llu lblock: %llu seen: %u flags: 0x%x\n",
+              f->filename,
               (unsigned long long)e->b_loff,
-              (unsigned long long)e->b_loff / blocksize, e->b_seen);
+              (unsigned long long)e->b_loff / blocksize, e->b_seen,
+              e->b_flags);
 }
 
 static void debug_print_tree(struct hash_tree *tree)
@@ -372,10 +375,12 @@
        assert(buf != NULL);
        unsigned char *digest = malloc(DIGEST_LEN_MAX);
        assert(digest != NULL);
+       static long long unsigned cur_num_filerecs = 0;
 
        GMutex *tree_mutex = g_dataset_get_data(tree, "mutex");
 
-       printf("csum: %s\n", file->filename);
+       printf("csum: %s \t[%llu/%llu]\n", file->filename,
+              __sync_add_and_fetch(&cur_num_filerecs, 1), num_filerecs);
 
        fc = alloc_fiemap_ctxt();
        if (fc == NULL) /* This should be non-fatal */
@@ -448,6 +453,8 @@
        filerec_close(file);
        free(digest);
        free(buf);
+       if (fc)
+               free(fc);
 
        return;
 
@@ -456,6 +463,8 @@
 err_noclose:
        free(digest);
        free(buf);
+       if (fc)
+               free(fc);
 
        fprintf(
                stderr,
@@ -543,7 +552,7 @@
        printf("\t-A\t\tOpens files readonly when deduping. Primarily for use 
by privileged users on readonly snapshots\n");
        printf("\t-b bsize\tUse bsize blocks. Default is %dk.\n",
               DEFAULT_BLOCKSIZE / 1024);
-       printf("\t-h\t\tPrint numbers in human-readble format.\n");
+       printf("\t-h\t\tPrint numbers in human-readable format.\n");
        printf("\t-v\t\tBe verbose.\n");
        printf("\t--hash-threads=N\n\t\t\tUse N threads for hashing phase. "
               "Default is automatically detected.\n");
@@ -593,44 +602,18 @@
        return 0;
 }
 
-static int check_file_fs(struct filerec *file, int *bad_fs)
-{
-       int ret;
-       struct statfs fs;
-
-       *bad_fs = 0;
-
-       if (!run_dedupe)
-               return 0;
-
-       ret = filerec_open(file, 0);
-       if (ret)
-               return ret;
-
-       ret = fstatfs(file->fd, &fs);
-       if (ret) {
-               ret = -errno;
-               goto out;
-       }
-
-       if (fs.f_type != BTRFS_SUPER_MAGIC)
-               *bad_fs = 1;
-
-out:
-       filerec_close(file);
-       return ret;
-}
-
 /*
  * Returns nonzero on fatal errors only
  */
 static int add_file(const char *name, int dirfd)
 {
        int ret, len = strlen(name);
-       int bad_fs;
+       int fd;
+       int on_btrfs = 0;
        struct stat st;
        char *pathtmp;
        struct filerec *file;
+       uint64_t subvolid;
 
        if (len > (path_max - pathp)) {
                fprintf(stderr, "Path max exceeded: %s %s\n", path, name);
@@ -671,26 +654,59 @@
                goto out;
        }
 
-       file = filerec_new(path, st.st_ino);
-       if (file == NULL) {
-               fprintf(stderr, "Out of memory while allocating file record "
-                       "for: %s\n", path);
-               return ENOMEM;
+       fd = open(path, O_RDONLY);
+       if (fd == -1) {
+               ret = errno;
+               fprintf(stderr, "Error %d: %s while opening file \"%s\". "
+                       "Skipping.\n", ret, strerror(ret), path);
+               goto out;
        }
 
-       ret = check_file_fs(file, &bad_fs);
+       ret = check_file_btrfs(fd, &on_btrfs);
        if (ret) {
-               fprintf(stderr, "Skip file \"%s\" due to errors\n",
-                       file->filename);
-               filerec_free(file);
-               return ENOMEM;
+               close(fd);
+               fprintf(stderr, "Skip file \"%s\" due to errors\n", path);
+               goto out;
        }
-       if (bad_fs) {
-               fprintf(stderr, "Can only dedupe files on btrfs\n");
-               filerec_free(file);
+
+       if (run_dedupe && !on_btrfs) {
+               close(fd);
+               fprintf(stderr, "\"%s\": Can only dedupe files on btrfs\n",
+                       path);
                return ENOSYS;
        }
 
+       if (on_btrfs) {
+               /*
+                * Inodes between subvolumes on a btrfs file system
+                * can have the same i_ino. Get the subvolume id of
+                * our file so hard link detection works.
+                */
+               ret = lookup_btrfs_subvolid(fd, &subvolid);
+               if (ret) {
+                       close(fd);
+                       fprintf(stderr,
+                               "Error %d: %s while finding subvolid for file "
+                               "\"%s\". Skipping.\n", ret, strerror(ret),
+                               path);
+                       goto out;
+               }
+       } else {
+               subvolid = st.st_dev;
+       }
+
+//     printf("\"%s\", ino: %llu, subvolid: %"PRIu64"\n", path,
+//            (unsigned long long)st.st_ino, subvolid);
+
+       close(fd);
+
+       file = filerec_new(path, st.st_ino, subvolid);
+       if (file == NULL) {
+               fprintf(stderr, "Out of memory while allocating file record "
+                       "for: %s\n", path);
+               return ENOMEM;
+       }
+
 out:
        pathp = pathtmp;
        return 0;
@@ -784,12 +800,27 @@
 
        /* Filter out option combinations that don't make sense. */
        if (write_hashes &&
-           (read_hashes || run_dedupe))
+           (read_hashes || run_dedupe)) {
+               if (run_dedupe)
+                       fprintf(stderr,
+                               "Error: Can not dedupe with --write-hashes "
+                               "option. Try writing hashes and then deduping "
+                               "with --read-hashes instead.\n");
+               if (read_hashes)
+                       fprintf(stderr,
+                               "Error: Specify only one of --write-hashes or "
+                               "--read-hashes.\n");
+
                return 1;
+       }
 
        if (read_hashes) {
-               if (numfiles)
+               if (numfiles) {
+                       fprintf(stderr,
+                               "Error: --read-hashes option does not take a "
+                               "file list argument\n");
                        return 1;
+               }
                goto out_nofiles;
        }
 
@@ -852,19 +883,14 @@
                (unsigned long long)eoff[1] / blocksize);
 }
 
-struct dupe_walk_ctxt {
-       struct file_block       *orig;
-
-       struct filerec          *orig_file;
-       struct filerec          *walk_file;
-
-       struct results_tree     *res;
-};
-
-static int walk_dupe_block(struct file_block *block, void *priv)
+static int walk_dupe_block(struct filerec *orig_file,
+                          struct file_block *orig_file_block,
+                          struct filerec *walk_file,
+                          struct file_block *walk_file_block,
+                          struct results_tree *res)
 {
-       struct dupe_walk_ctxt *ctxt = priv;
-       struct file_block *orig = ctxt->orig;
+       struct file_block *orig = orig_file_block;
+       struct file_block *block = walk_file_block;
        struct file_block *start[2] = { orig, block };
        struct file_block *end[2];
        struct running_checksum *csum;
@@ -891,8 +917,8 @@
                 * This is kind of ugly, however it does correctly
                 * signify the end of our list.
                 */
-               if (orig->b_file_next.next == &ctxt->orig_file->block_list ||
-                   block->b_file_next.next == &ctxt->walk_file->block_list)
+               if (orig->b_file_next.next == &orig_file->block_list ||
+                   block->b_file_next.next == &walk_file->block_list)
                        break;
 
                orig =  list_entry(orig->b_file_next.next, struct file_block,
@@ -903,32 +929,59 @@
 
        finish_running_checksum(csum, match_id);
 
-       record_match(ctxt->res, match_id, ctxt->orig_file, ctxt->walk_file,
+       record_match(res, match_id, orig_file, walk_file,
                     start, end);
 out:
        return 0;
 }
 
+/*
+ * Start an extent search (with orig_block) at each block in our dups
+ * list which is owned by walk_file.
+ */
+static void lookup_walk_file_hash_head(struct file_block *orig_block,
+                                      struct filerec *walk_file,
+                                      struct results_tree *res)
+{
+       struct dupe_blocks_list *parent = orig_block->b_parent;
+       struct file_block *cur;
+       struct file_hash_head *head = find_file_hash_head(parent, walk_file);
+
+       /* find_file_dups should have checked this for us already */
+       abort_on(head == NULL);
+
+       list_for_each_entry(cur, &head->h_blocks, b_head_list) {
+               /* Ignore self. Technically this shouldn't happen (see above)
+                * until we allow walking a file against itself. */
+               if (cur == orig_block)
+                       continue;
+
+               abort_on(cur->b_file != walk_file);
+
+               if (walk_dupe_block(orig_block->b_file, orig_block,
+                                   walk_file, cur, res))
+                       break;
+       }
+}
+
 static void find_file_dupes(struct filerec *file, struct filerec *walk_file,
                            struct results_tree *res)
 {
        struct file_block *cur;
-       struct dupe_walk_ctxt ctxt = { 0, };
 
        list_for_each_entry(cur, &file->block_list, b_file_next) {
                if (block_seen(cur))
                        continue;
+
+               if (!file_in_dups_list(cur->b_parent, walk_file))
+                       continue;
+
                /*
                 * For each file block with the same hash:
                 *  - Traverse, along with original file until we have no match
                 *     - record
                 */
-               memset(&ctxt, 0, sizeof(struct dupe_walk_ctxt));
-               ctxt.orig_file = file;
-               ctxt.walk_file = walk_file;
-               ctxt.orig = cur;
-               ctxt.res = res;
-               for_each_dupe(cur, walk_file, walk_dupe_block, &ctxt);
+               lookup_walk_file_hash_head(cur, walk_file, res);
        }
        clear_all_seen_blocks();
 }
@@ -941,50 +994,6 @@
        return mark_filerecs_compared(file1, file2);
 }
 
-/*
- * This dupe list is too large for the extent search algorithm to
- * handle efficiently. Instead of walking the block list, we walk the
- * list of files referenced and compare them to each other directly.
- *
- * A future improvement might be to always do this, at the cost of
- * extra memory usage.
- */
-static int walk_large_dups(struct hash_tree *tree,
-                          struct results_tree *res,
-                          struct dupe_blocks_list *dups)
-{
-       int ret;
-       struct rb_node *node = rb_first(&dups->dl_files_root);
-       struct rb_node *next;
-       struct filerec_token *t1, *t2;
-       struct filerec *file1, *file2;
-
-       while (node) {
-               t1 = rb_entry(node, struct filerec_token, t_node);
-               file1 = t1->t_file;
-
-               next = rb_next(node);
-               while (next) {
-                       t2 = rb_entry(next, struct filerec_token, t_node);
-                       file2 = t2->t_file;
-
-                       /* filerec token tree does not allow duplicates */
-                       abort_on(file1 == file2);
-                       if (!filerecs_compared(file1, file2)) {
-                               ret = compare_files(res, file1, file2);
-                               if (ret)
-                                       return ret;
-                               break;
-                       }
-
-                       next = rb_next(next);
-               }
-               node = rb_next(node);
-       }
-
-       return 0;
-}
-
 static int walk_dupe_hashes(struct dupe_blocks_list *dups,
                            struct results_tree *res)
 {
@@ -1043,13 +1052,59 @@
        return 0;
 }
 
+static void update_extent_search_status(struct hash_tree *tree,
+                                       unsigned long long processed)
+{
+       static int last_pos = -1;
+       int i, pos;
+       int width = 40;
+       float progress;
+
+       if (!fancy_status)
+               return;
+
+       progress = (float) processed / tree->num_hashes;
+       pos = width * progress;
+
+       /* Only update our status every width% */
+       if (pos <= last_pos)
+               return;
+       last_pos = pos;
+
+       printf("\r[");
+       for(i = 0; i < width; i++) {
+               if (i < pos)
+                       printf("#");
+               else if (i == pos)
+                       printf("%%");
+               else
+                       printf(" ");
+       }
+       printf("]");
+       fflush(stdout);
+}
+
+static void clear_extent_search_status(unsigned long long processed,
+                                      int err)
+{
+       if (!fancy_status)
+               return;
+
+       if (err)
+               printf("\rSearch exited (%llu processed) with error %d: "
+                      "\"%s\"\n", processed, err, strerror(err));
+       else
+               printf("\rSearch completed with no errors.             \n");
+       fflush(stdout);
+}
+
 static int find_all_dups(struct hash_tree *tree, struct results_tree *res)
 {
-       int ret;
+       int ret = 0;
        struct rb_root *root = &tree->root;
        struct rb_node *node = rb_first(root);
        struct dupe_blocks_list *dups;
-       LIST_HEAD(large_dupes);
+       unsigned long long processed = 0;
 
        while (1) {
                if (node == NULL)
@@ -1057,29 +1112,22 @@
 
                dups = rb_entry(node, struct dupe_blocks_list, dl_node);
 
-               if (dups->dl_num_files) {
-                       list_add_tail(&dups->dl_large_list, &large_dupes);
+               update_extent_search_status(tree, processed);
 
-                       printf("Hash (\"");
-                       debug_print_digest(stdout, dups->dl_hash);
-                       printf("\") has %u items (across %u files), processing 
later.\n",
-                              dups->dl_num_elem, dups->dl_num_files);
-               } else if (dups->dl_num_elem > 1) {
+               if (dups->dl_num_elem > 1) {
                        ret = walk_dupe_hashes(dups, res);
                        if (ret)
-                               return ret;
+                               goto out;
                }
 
-               node = rb_next(node);
-       }
+               processed++;
 
-       list_for_each_entry(dups, &large_dupes, dl_large_list) {
-               ret = walk_large_dups(tree, res, dups);
-               if (ret)
-                       return ret;
+               node = rb_next(node);
        }
+out:
 
-       return 0;
+       clear_extent_search_status(processed, ret);
+       return ret;
 }
 
 int main(int argc, char **argv)
@@ -1101,6 +1149,9 @@
                return EINVAL;
        }
 
+       if (isatty(STDOUT_FILENO))
+               fancy_status = 1;
+
        if (read_hashes) {
                ret = read_hash_tree(serialize_fname, &tree, &blocksize, NULL);
                if (ret == FILE_VERSION_ERROR) {
@@ -1143,8 +1194,9 @@
                        fprintf(stderr, "Error %d while writing to hash 
file\n", ret);
                goto out;
        } else {
-               printf("Hashed %"PRIu64" blocks. Calculating duplicate "
-                      "extents - this may take some time.\n", tree.num_blocks);
+               printf("Hashed %"PRIu64" blocks, resulting in %"PRIu64" unique "
+                      "hashes. Calculating duplicate extents - this may take "
+                      "some time.\n", tree.num_blocks, tree.num_hashes);
        }
 
        ret = find_all_dups(&tree, &res);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/filerec.c 
new/duperemove-v0.09.beta2/filerec.c
--- old/duperemove-v0.09.beta1/filerec.c        2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/filerec.c        2014-10-31 00:27:39.000000000 
+0100
@@ -83,13 +83,19 @@
                free_filerec_token(token);
 }
 
+static void filerec_token_init(struct filerec_token *token,
+                              struct filerec *file)
+{
+       rb_init_node(&token->t_node);
+       token->t_file = file;
+}
+
 struct filerec_token *filerec_token_new(struct filerec *file)
 {
        struct filerec_token *token = malloc_filerec_token();
 
        if (token) {
-               rb_init_node(&token->t_node);
-               token->t_file = file;
+               filerec_token_init(token, file);
        }
        return token;
 }
@@ -151,8 +157,23 @@
        }
 }
 
+static int cmp_filerecs(struct filerec *file1, uint64_t file2_inum,
+                       uint64_t file2_subvolid)
+{
+       if (file1->inum < file2_inum)
+               return -1;
+       else if (file1->inum > file2_inum)
+               return 1;
+       if (file1->subvolid < file2_subvolid)
+               return -1;
+       if (file1->subvolid > file2_subvolid)
+               return -1;
+       return 0;
+}
+
 static void insert_filerec(struct filerec *file)
 {
+       int c;
        struct rb_node **p = &filerec_by_inum.rb_node;
        struct rb_node *parent = NULL;
        struct filerec *tmp;
@@ -162,9 +183,10 @@
 
                tmp = rb_entry(parent, struct filerec, inum_node);
 
-               if (file->inum < tmp->inum)
+               c = cmp_filerecs(tmp, file->inum, file->subvolid);
+               if (c < 0)
                        p = &(*p)->rb_left;
-               else if (file->inum > tmp->inum)
+               else if (c > 0)
                        p = &(*p)->rb_right;
                else
                        abort_lineno(); /* We should never find a duplicate */
@@ -175,17 +197,19 @@
        return;
 }
 
-static struct filerec *find_filerec(uint64_t inum)
+static struct filerec *find_filerec(uint64_t inum, uint64_t subvolid)
 {
+       int c;
        struct rb_node *n = filerec_by_inum.rb_node;
        struct filerec *file;
 
        while (n) {
                file = rb_entry(n, struct filerec, inum_node);
 
-               if (inum < file->inum)
+               c = cmp_filerecs(file, inum, subvolid);
+               if (c < 0)
                        n = n->rb_left;
-               else if (inum > file->inum)
+               else if (c > 0)
                        n = n->rb_right;
                else
                        return file;
@@ -193,7 +217,8 @@
        return NULL;
 }
 
-static struct filerec *filerec_alloc_insert(const char *filename, uint64_t 
inum)
+static struct filerec *filerec_alloc_insert(const char *filename,
+                                           uint64_t inum, uint64_t subvolid)
 {
        struct filerec *file = calloc_filerec(1);
 
@@ -210,6 +235,7 @@
                INIT_LIST_HEAD(&file->tmp_list);
                rb_init_node(&file->inum_node);
                file->inum = inum;
+               file->subvolid = subvolid;
                file->comparisons = RB_ROOT;
 
                insert_filerec(file);
@@ -219,11 +245,12 @@
        return file;
 }
 
-struct filerec *filerec_new(const char *filename, uint64_t inum)
+struct filerec *filerec_new(const char *filename, uint64_t inum,
+                           uint64_t subvolid)
 {
-       struct filerec *file = find_filerec(inum);
+       struct filerec *file = find_filerec(inum, subvolid);
        if (!file)
-               file = filerec_alloc_insert(filename, inum);
+               file = filerec_alloc_insert(filename, inum, subvolid);
        return file;
 }
 
@@ -458,9 +485,12 @@
                                last = 1;
 
                        dprintf("(fiemap) [%d] fe_logical: %llu, "
-                               "fe_length: %llu\n",
+                               "fe_length: %llu, fe_physical: %llu, "
+                               "fe_flags: 0x%x\n",
                                i, (unsigned long long)fm_ext[i].fe_logical,
-                               (unsigned long long)fm_ext[i].fe_length);
+                               (unsigned long long)fm_ext[i].fe_length,
+                               (unsigned long long)fm_ext[i].fe_physical,
+                               fm_ext[i].fe_flags);
 
                        loff = fm_ext[i].fe_logical;
                        ext_len = fm_ext[i].fe_length;
@@ -494,8 +524,9 @@
                                last = 1;
                        }
 
-                       dprintf("(fiemap) loff: %"PRIu64" ext_len: %"PRIu64"\n",
-                               loff, ext_len);
+//                     dprintf("(fiemap) loff: %"PRIu64" ext_len: %"PRIu64
+//                             " flags: 0x%x\n",
+//                             loff, ext_len, fm_ext[i].fe_flags);
 
                        if (fm_ext[i].fe_flags & FIEMAP_EXTENT_SHARED)
                                *shared_bytes += ext_len;
@@ -527,7 +558,7 @@
                return 1;
        }
 
-       file = filerec_new(argv[1], 500); /* Use made up ino */
+       file = filerec_new(argv[1], 500, 1); /* Use made up ino */
        if (!file) {
                fprintf(stderr, "filerec_new(): malloc error\n");
                return 1;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/filerec.h 
new/duperemove-v0.09.beta2/filerec.h
--- old/duperemove-v0.09.beta1/filerec.h        2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/filerec.h        2014-10-31 00:27:39.000000000 
+0100
@@ -11,6 +11,7 @@
 struct filerec {
        int             fd;                     /* file descriptor */
        char    *filename;              /* path to file */
+       uint64_t subvolid;
 
        uint64_t                inum;
        struct rb_node          inum_node;
@@ -29,7 +30,8 @@
 
 void init_filerec(void);
 
-struct filerec *filerec_new(const char *filename, uint64_t inum);
+struct filerec *filerec_new(const char *filename, uint64_t inum,
+                           uint64_t subvolid);
 void filerec_free(struct filerec *file);
 int filerec_open(struct filerec *file, int write);
 void filerec_close(struct filerec *file);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/hash-tree.c 
new/duperemove-v0.09.beta2/hash-tree.c
--- old/duperemove-v0.09.beta1/hash-tree.c      2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/hash-tree.c      2014-10-31 00:27:39.000000000 
+0100
@@ -34,48 +34,83 @@
 declare_alloc_tracking(file_block);
 declare_alloc_tracking(dupe_blocks_list);
 
-static int add_one_filerec_token(struct dupe_blocks_list *dups,
-                                struct filerec *file)
+struct file_hash_head *find_file_hash_head(struct dupe_blocks_list *dups,
+                                          struct filerec *file)
 {
-       struct filerec_token *t = NULL;
+       struct rb_node *n = dups->dl_files_root.rb_node;
+       struct file_hash_head *head;
 
-       if (find_filerec_token_rb(&dups->dl_files_root, file))
-               return 0;
-
-       t = filerec_token_new(file);
-       if (!t)
-               return ENOMEM;
+       while (n) {
+               head = rb_entry(n, struct file_hash_head, h_node);
 
-       insert_filerec_token_rb(&dups->dl_files_root, t);
-       dups->dl_num_files++;
-       return 0;
+               if (head->h_file < file)
+                       n = n->rb_left;
+               else if (head->h_file > file)
+                       n = n->rb_right;
+               else return head;
+       }
+       return NULL;
 }
 
-static int add_filerec_tokens(struct dupe_blocks_list *dups)
+static void insert_file_hash_head(struct dupe_blocks_list *dups,
+                                 struct file_hash_head *head)
 {
-       struct file_block *block;
+       struct rb_node **p = &dups->dl_files_root.rb_node;
+       struct rb_node *parent = NULL;
+       struct file_hash_head *tmp;
 
-       list_for_each_entry(block, &dups->dl_list, b_list) {
-               if (add_one_filerec_token(dups, block->b_file))
-                       return ENOMEM;
+       while (*p) {
+               parent = *p;
+
+               tmp = rb_entry(parent, struct file_hash_head, h_node);
+
+               if (tmp->h_file < head->h_file)
+                       p = &(*p)->rb_left;
+               else if (tmp->h_file > head->h_file)
+                       p = &(*p)->rb_right;
+               else abort_lineno(); /* We should never find a duplicate */
        }
-       return 0;
+
+       rb_link_node(&head->h_node, parent, p);
+       rb_insert_color(&head->h_node, &dups->dl_files_root);
 }
 
-static void free_filerec_tokens(struct dupe_blocks_list *dups)
+static int add_file_hash_head(struct dupe_blocks_list *dups,
+                             struct file_block *block)
 {
-       struct rb_node *node = rb_first(&dups->dl_files_root);
-       struct filerec_token *t;
+       struct filerec *file = block->b_file;
+       struct file_hash_head *head = find_file_hash_head(dups, file);
 
-       while (node) {
-               t = rb_entry(node, struct filerec_token, t_node);
+       if (head)
+               goto add;
 
-               node = rb_next(node);
+       head = malloc(sizeof(*head));
+       if (!head)
+               return ENOMEM;
 
-               dups->dl_num_files--;
-               rb_erase(&t->t_node, &dups->dl_files_root);
-               filerec_token_free(t);
-       }
+       head->h_file = file;
+       rb_init_node(&head->h_node);
+       INIT_LIST_HEAD(&head->h_blocks);
+       insert_file_hash_head(dups, head);
+       dups->dl_num_files++;
+add:
+       /* We depend on this being added in increasing block order */
+       list_add_tail(&block->b_head_list, &head->h_blocks);
+       return 0;
+}
+
+static void free_one_hash_head(struct dupe_blocks_list *dups,
+                              struct file_hash_head *head)
+{
+       rb_erase(&head->h_node, &dups->dl_files_root);
+       free(head);
+}
+
+int file_in_dups_list(struct dupe_blocks_list *dups, struct filerec *file)
+{
+       if (find_file_hash_head(dups, file))
+               return 1;
+       return 0;
 }
 
 static void insert_block_list(struct hash_tree *tree,
@@ -147,30 +182,27 @@
                rb_init_node(&d->dl_node);
                rb_init_node(&d->dl_by_size);
                INIT_LIST_HEAD(&d->dl_list);
-               INIT_LIST_HEAD(&d->dl_large_list);
                d->dl_files_root = RB_ROOT;
 
                insert_block_list(tree, d);
        }
 
-       if (d->dl_num_elem >= DUPLIST_CONVERT_LIMIT && d->dl_num_files == 0) {
-               if (add_filerec_tokens(d))
-                       return ENOMEM;
-       }
-
        e->b_file = file;
        e->b_seen = 0;
        e->b_loff = loff;
        e->b_flags = flags;
-       list_add_tail(&e->b_file_next, &file->block_list);
-       file->num_blocks++;
        e->b_parent = d;
 
-       if (d->dl_num_files) {
-               if (add_one_filerec_token(d, file))
-                       return ENOMEM;
+       INIT_LIST_HEAD(&e->b_head_list);
+
+       if (add_file_hash_head(d, e)) {
+               free_file_block(e);
+               return ENOMEM;
        }
 
+       list_add_tail(&e->b_file_next, &file->block_list);
+       file->num_blocks++;
+
        d->dl_num_elem++;
        list_add_tail(&e->b_list, &d->dl_list);
 
@@ -182,6 +214,7 @@
                                struct file_block *block, struct filerec *file)
 {
        struct dupe_blocks_list *blocklist = block->b_parent;
+       struct file_hash_head *head;
 
        abort_on(blocklist->dl_num_elem == 0);
 
@@ -193,12 +226,16 @@
        list_del(&block->b_file_next);
        list_del(&block->b_list);
 
+       list_del(&block->b_head_list);
+       head = find_file_hash_head(blocklist, file);
+       if (head && list_empty(&head->h_blocks))
+               free_one_hash_head(blocklist, head);
+
        blocklist->dl_num_elem--;
        if (blocklist->dl_num_elem == 0) {
                rb_erase(&blocklist->dl_node, &tree->root);
                tree->num_hashes--;
 
-               free_filerec_tokens(blocklist);
                free_dupe_blocks_list(blocklist);
        }
 
@@ -214,25 +251,6 @@
                remove_hashed_block(tree, block, file);
 }
 
-void for_each_dupe(struct file_block *block, struct filerec *file,
-                  for_each_dupe_t func, void *priv)
-{
-       struct dupe_blocks_list *parent = block->b_parent;
-       struct file_block *cur;
-
-       list_for_each_entry(cur, &parent->dl_list, b_list) {
-               /* Ignore self and any blocks from another file */
-               if (cur == block)
-                       continue;
-
-               if (cur->b_file != file)
-                       continue;
-
-               if (func(cur, priv))
-                       break;
-       }
-}
-
 static unsigned int seen_counter = 1;
 
 int block_seen(struct file_block *block)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/hash-tree.h 
new/duperemove-v0.09.beta2/hash-tree.h
--- old/duperemove-v0.09.beta1/hash-tree.h      2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/hash-tree.h      2014-10-31 00:27:39.000000000 
+0100
@@ -14,24 +14,14 @@
        unsigned int    dl_num_elem;
        struct list_head        dl_list;
 
-       /*
-        * num_files and files_root are used when the total number of
-        * blocks in the list exceeds DUPLIST_CONVERT_LIMIT (defined
-        * below)
-        */
        unsigned int            dl_num_files;
-       struct rb_root          dl_files_root;
-       struct list_head        dl_large_list; /* Temporary list for
-                                               * use by extent finding code */
+       struct rb_root          dl_files_root; /* stores file_hash_head nodes */
 
        unsigned char           dl_hash[DIGEST_LEN_MAX];
 };
 
-/* Max number of blocks before we'll add filerec tokens */
-#define        DUPLIST_CONVERT_LIMIT           30000
-
 /* Fiemap flags that would cause us to skip comparison of the block */
-#define FIEMAP_SKIP_FLAGS      
(FIEMAP_EXTENT_UNKNOWN|FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_UNWRITTEN)
+#define FIEMAP_SKIP_FLAGS      
(FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_UNWRITTEN)
 /* Fiemap flags that indicate the extent may have already been deduped */
 #define FIEMAP_DEDUPED_FLAGS   (FIEMAP_EXTENT_SHARED)
 
@@ -50,15 +40,33 @@
                                          * with this md5. */
 
        struct list_head        b_file_next; /* filerec->block_list */
+
+       struct list_head        b_head_list; /* file_hash_head->h_blocks */
 };
 
 int insert_hashed_block(struct hash_tree *tree, unsigned char *digest,
                        struct filerec *file, uint64_t loff, unsigned int 
flags);
 void remove_hashed_blocks(struct hash_tree *tree, struct filerec *file);
 
-typedef int (for_each_dupe_t)(struct file_block *, void *);
-void for_each_dupe(struct file_block *block, struct filerec *file,
-                  for_each_dupe_t func, void *priv);
+/*
+ * Stores a list of blocks with the same hash / filerec
+ * combination. Each dupe_blocks_list keeps a tree of these (sorted by
+ * file).
+ *
+ * This speeds up the extent search by allowing us to skip blocks that
+ * don't belong to the file we are 'walking'. Blocks are inserted into
+ * h_blocks in the same order they are given to insert_hashed_block()
+ * (that is, in order of increasing offset).
+ */
+struct file_hash_head {
+       struct filerec *h_file;
+       struct rb_node h_node;
+       struct list_head h_blocks;
+};
+
+int file_in_dups_list(struct dupe_blocks_list *dups, struct filerec *file);
+struct file_hash_head *find_file_hash_head(struct dupe_blocks_list *dups,
+                                          struct filerec *file);
 
 int block_seen(struct file_block *block);
 int block_ever_seen(struct file_block *block);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/hashstats.c 
new/duperemove-v0.09.beta2/hashstats.c
--- old/duperemove-v0.09.beta1/hashstats.c      2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/hashstats.c      2014-10-31 00:27:39.000000000 
+0100
@@ -93,6 +93,21 @@
        }
 }
 
+static void printf_file_block_flags(struct file_block *block)
+{
+       if (!block->b_flags)
+               return;
+
+       printf("( ");
+       if (block->b_flags & FILE_BLOCK_SKIP_COMPARE)
+               printf("skip_compare ");
+       if (block->b_flags & FILE_BLOCK_DEDUPED)
+               printf("deduped ");
+       if (block->b_flags & FILE_BLOCK_HOLE)
+               printf("hole ");
+       printf(")");
+}
+
 static void print_by_size(void)
 {
        struct rb_node *node = rb_first(&by_size);
@@ -104,6 +119,8 @@
        else
                printf("Print top %d hashes\n", num_to_print);
 
+       printf("Hash, # Blocks, # Files\n");
+
        while (1) {
                if (node == NULL)
                        break;
@@ -111,14 +128,18 @@
                dups = rb_entry(node, struct dupe_blocks_list, dl_by_size);
 
                debug_print_digest(stdout, dups->dl_hash);
-               printf(", %u\n", dups->dl_num_elem);
+               printf(", %u, %u\n", dups->dl_num_elem, dups->dl_num_files);
                if (print_blocks) {
                        list_for_each_entry(block, &dups->dl_list,
                                            b_list) {
                                struct filerec *f = block->b_file;
-                               printf("  %s\tloff: %llu lblock: %llu\n", 
f->filename,
+                               printf("  %s\tloff: %llu lblock: %llu "
+                                      "flags: 0x%x ", f->filename,
                                       (unsigned long long)block->b_loff,
-                                      (unsigned long long)block->b_loff / 
blocksize);
+                                      (unsigned long long)block->b_loff / 
blocksize,
+                                      block->b_flags);
+                               printf_file_block_flags(block);
+                               printf("\n");
                        }
                }
 
@@ -133,12 +154,12 @@
 {
        struct filerec *file;
 
-       printf("Showing %llu files.\nInode\tBlocks Stored\tFilename\n",
+       printf("Showing %llu files.\nInode\tBlocks Stored\tSubvold 
ID\tFilename\n",
                num_filerecs);
 
        list_for_each_entry(file, &filerec_list, rec_list) {
-               printf("%"PRIu64"\t%"PRIu64"\t%s\n", file->inum,
-                      file->num_blocks, file->filename);
+               printf("%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%s\n", file->inum,
+                      file->num_blocks, file->subvolid, file->filename);
        }
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/serialize.c 
new/duperemove-v0.09.beta2/serialize.c
--- old/duperemove-v0.09.beta1/serialize.c      2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/serialize.c      2014-10-31 00:27:39.000000000 
+0100
@@ -34,8 +34,8 @@
 #include "debug.h"
 
 #include "csum.h"
-#include "hash-tree.h"
 #include "filerec.h"
+#include "hash-tree.h"
 
 #include "serialize.h"
 
@@ -131,6 +131,7 @@
        finfo.ino = swap64(file->inum);
        finfo.file_size = 0ULL; /* We don't store this yet */
        finfo.num_blocks = swap64(file->num_blocks);
+       finfo.subvolid = swap64(file->subvolid);
 
        name_len = strlen(file->filename);
        finfo.name_len = swap16(name_len);
@@ -256,6 +257,7 @@
        f->ino = swap64(disk.ino);
        f->file_size = swap64(disk.file_size);
        f->num_blocks = swap64(disk.num_blocks);
+       f->subvolid = swap64(disk.subvolid);
        f->rec_len = swap16(disk.rec_len);
        f->name_len = swap16(disk.name_len);
 
@@ -295,7 +297,7 @@
        dprintf("Load %"PRIu64" hashes for \"%s\"\n", finfo.num_blocks,
                fname);
 
-       file = filerec_new(fname, finfo.ino);
+       file = filerec_new(fname, finfo.ino, finfo.subvolid);
        if (file == NULL)
                return ENOMEM;
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-v0.09.beta1/serialize.h 
new/duperemove-v0.09.beta2/serialize.h
--- old/duperemove-v0.09.beta1/serialize.h      2014-09-27 01:56:37.000000000 
+0200
+++ new/duperemove-v0.09.beta2/serialize.h      2014-10-31 00:27:39.000000000 
+0100
@@ -47,7 +47,8 @@
        uint16_t        rec_len;
        uint16_t        name_len;
        uint32_t        pad0;
-/*20*/ uint64_t        pad1[3];
+       uint64_t        subvolid;
+/*20*/ uint64_t        pad1[2];
        char            name[0];
 };
 

-- 
To unsubscribe, e-mail: opensuse-commit+unsubscr...@opensuse.org
For additional commands, e-mail: opensuse-commit+h...@opensuse.org

commit duperemove for openSUSE:Factory

Reply via email to