Hello community,

here is the log from the commit of package duperemove for openSUSE:Factory 
checked in at 2015-07-19 11:45:46
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/duperemove (Old)
 and      /work/SRC/openSUSE:Factory/.duperemove.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "duperemove"

Changes:
--------
--- /work/SRC/openSUSE:Factory/duperemove/duperemove.changes    2015-07-14 
17:45:19.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.duperemove.new/duperemove.changes       
2015-07-19 11:45:47.000000000 +0200
@@ -1,0 +2,6 @@
+Thu Jul 16 21:45:34 UTC 2015 - mfas...@suse.com
+
+- Update to duperemove v0.10.beta4
+  - Better memory usage with hashstats utility
+
+-------------------------------------------------------------------

Old:
----
  v0.10.beta3.tar.gz

New:
----
  v0.10.beta4.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ duperemove.spec ++++++
--- /var/tmp/diff_new_pack.JjD2Cz/_old  2015-07-19 11:45:48.000000000 +0200
+++ /var/tmp/diff_new_pack.JjD2Cz/_new  2015-07-19 11:45:48.000000000 +0200
@@ -16,10 +16,10 @@
 #
 
 
-%define tar_version 0.10.beta3
+%define tar_version 0.10.beta4
 %define samename btrfs-extent-same
 Name:           duperemove
-Version:        0.10.beta3
+Version:        0.10.beta4
 Release:        0
 Summary:        Software to find duplicate extents in files and remove them
 License:        GPL-2.0
@@ -28,7 +28,6 @@
 Source:         
https://github.com/markfasheh/%{name}/archive/v%{tar_version}.tar.gz
 BuildRequires:  gcc-c++
 BuildRequires:  glib2-devel
-BuildRequires:  libgcrypt-devel
 BuildRequires:  sqlite3-devel
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 

++++++ v0.10.beta3.tar.gz -> v0.10.beta4.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-0.10.beta3/Makefile 
new/duperemove-0.10.beta4/Makefile
--- old/duperemove-0.10.beta3/Makefile  2015-07-10 09:46:56.000000000 +0200
+++ new/duperemove-0.10.beta4/Makefile  2015-07-16 22:52:51.000000000 +0200
@@ -1,4 +1,4 @@
-VER=0.10.beta3
+VER=0.10.beta4
 RELEASE=v$(VER)
 
 CC = gcc
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-0.10.beta3/dbfile.c 
new/duperemove-0.10.beta4/dbfile.c
--- old/duperemove-0.10.beta3/dbfile.c  2015-07-10 09:46:56.000000000 +0200
+++ new/duperemove-0.10.beta4/dbfile.c  2015-07-16 22:52:51.000000000 +0200
@@ -21,7 +21,8 @@
 #define DB_FILE_MAJOR  1
 #define DB_FILE_MINOR  2
 
-static sqlite3 *gdb = NULL;
+/* exported for hashstats.c */
+sqlite3 *gdb = NULL;
 
 #if (SQLITE_VERSION_NUMBER < 3007015)
 #define        perror_sqlite(_err, _why)                                       
\
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-0.10.beta3/hashstats.c 
new/duperemove-0.10.beta4/hashstats.c
--- old/duperemove-0.10.beta3/hashstats.c       2015-07-10 09:46:56.000000000 
+0200
+++ new/duperemove-0.10.beta4/hashstats.c       2015-07-16 22:52:51.000000000 
+0200
@@ -33,6 +33,8 @@
 
 #include "bswap.h"
 
+extern sqlite3 *gdb;
+
 int verbose = 0, debug = 0;
 unsigned int blocksize;
 static int version_only = 0;
@@ -41,144 +43,212 @@
 static int num_to_print = 10;
 static int print_file_list = 0;
 static char *serialize_fname = NULL;
-static struct rb_root by_size = RB_ROOT;
+static uint64_t disk_files, disk_hashes;
 
-static int cmp(struct dupe_blocks_list *tmp, struct dupe_blocks_list *dups)
-{
-       if (tmp->dl_num_elem < dups->dl_num_elem)
-               return -1;
-       else if (tmp->dl_num_elem > dups->dl_num_elem)
-               return 1;
-       return memcmp(dups->dl_hash, tmp->dl_hash, digest_len);
-}
+static sqlite3_stmt *top_hashes_stmt = NULL;
+static sqlite3_stmt *files_count_stmt = NULL;
+static sqlite3_stmt *find_blocks_stmt = NULL;
 
-static void insert_by_size(struct dupe_blocks_list *dups)
+static int prepare_statements(void)
 {
-       struct rb_node **p = &by_size.rb_node;
-       struct rb_node *parent = NULL;
-       struct dupe_blocks_list *tmp;
        int ret;
 
-       while (*p) {
-               parent = *p;
-
-               tmp = rb_entry(parent, struct dupe_blocks_list, dl_by_size);
+#define        FIND_TOP_HASHES                                                 
\
+"select digest, count(digest) from hashes group by digest having 
(count(digest) > 1) order by (count(digest)) desc;"
+       ret = sqlite3_prepare_v2(gdb, FIND_TOP_HASHES, -1, &top_hashes_stmt,
+                                NULL);
+       if (ret) {
+               fprintf(stderr, "error %d while prepping hash search stmt: 
%s\n",
+                       ret, sqlite3_errstr(ret));
+               return ret;
+       }
 
-               ret = cmp(tmp, dups);
-               if (ret < 0)
-                       p = &(*p)->rb_left;
-               else if (ret > 0)
-                       p = &(*p)->rb_right;
-               else
-                       break;
+#define        FIND_FILES_COUNT                                                
\
+"select count (distinct files.filename) from files INNER JOIN hashes on 
hashes.digest = ?1 AND files.subvol=hashes.subvol AND files.ino=hashes.ino;"
+       ret = sqlite3_prepare_v2(gdb, FIND_FILES_COUNT, -1, &files_count_stmt,
+                                NULL);
+       if (ret) {
+               fprintf(stderr, "error %d while preparing file count stmt: 
%s\n",
+                       ret, sqlite3_errstr(ret));
+               return ret;
        }
 
-       rb_link_node(&dups->dl_by_size, parent, p);
-       rb_insert_color(&dups->dl_by_size, &by_size);
+#define        FIND_BLOCKS                                                     
\
+"select files.filename, hashes.loff, hashes.flags from files INNER JOIN hashes 
on hashes.digest = ?1 AND files.subvol=hashes.subvol AND files.ino=hashes.ino;"
+
+       ret = sqlite3_prepare_v2(gdb, FIND_BLOCKS, -1, &find_blocks_stmt, NULL);
+       if (ret) {
+               fprintf(stderr, "error %d while prepping find blocks stmt: 
%s\n",
+                       ret, sqlite3_errstr(ret));
+               return ret;
+       }
+       return 0;
 }
 
-static void sort_by_size(struct hash_tree *tree)
+static void finalize_statements(void)
 {
-       struct rb_root *root = &tree->root;
-       struct rb_node *node = rb_first(root);
-       struct dupe_blocks_list *dups;
-
-       while (1) {
-               if (node == NULL)
-                       break;
-
-               dups = rb_entry(node, struct dupe_blocks_list, dl_node);
-
-               insert_by_size(dups);
-
-               node = rb_next(node);
-       }
+       sqlite3_finalize(top_hashes_stmt);
+       sqlite3_finalize(files_count_stmt);
+       sqlite3_finalize(find_blocks_stmt);
 }
 
-static void printf_file_block_flags(struct file_block *block)
+static void printf_file_block_flags(unsigned int flags)
 {
-       if (!block->b_flags)
+       if (!flags)
                return;
 
        printf("( ");
-       if (block->b_flags & FILE_BLOCK_SKIP_COMPARE)
+       if (flags & FILE_BLOCK_SKIP_COMPARE)
                printf("skip_compare ");
-       if (block->b_flags & FILE_BLOCK_DEDUPED)
+       if (flags & FILE_BLOCK_DEDUPED)
                printf("deduped ");
-       if (block->b_flags & FILE_BLOCK_HOLE)
+       if (flags & FILE_BLOCK_HOLE)
                printf("hole ");
+       if (flags & FILE_BLOCK_PARTIAL)
+               printf("partial ");
        printf(")");
 }
 
+static int print_all_blocks(unsigned char *digest)
+{
+       int ret;
+       uint64_t loff;
+       unsigned int flags;
+       const unsigned char *filename;
+
+       ret = sqlite3_bind_blob(find_blocks_stmt, 1, digest, digest_len,
+                               SQLITE_STATIC);
+       if (ret) {
+               fprintf(stderr, "Error %d binding digest for blocks: %s\n", ret,
+                       sqlite3_errstr(ret));
+               return ret;
+       }
+
+       while ((ret = sqlite3_step(find_blocks_stmt)) == SQLITE_ROW) {
+               filename = sqlite3_column_text(find_blocks_stmt, 0);
+               loff = sqlite3_column_int64(find_blocks_stmt, 1);
+               flags = sqlite3_column_int(find_blocks_stmt, 2);
+
+               printf("  %s\tloff: %llu lblock: %llu "
+                      "flags: 0x%x ", filename,
+                      (unsigned long long)loff,
+                      (unsigned long long)loff / blocksize,
+                      flags);
+               printf_file_block_flags(flags);
+               printf("\n");
+       }
+       if (ret != SQLITE_DONE) {
+               fprintf(stderr,
+                       "error %d running block stmt: %s\n",
+                       ret, sqlite3_errstr(ret));
+               return ret;
+       }
+
+       sqlite3_reset(find_blocks_stmt);
+
+       return 0;
+}
+
 static void print_by_size(void)
 {
-       struct rb_node *node = rb_first(&by_size);
-       struct dupe_blocks_list *dups;
-       struct file_block *block;
+       int ret;
+       int header_printed = 0;
+       unsigned char *digest;
+       uint64_t count, files_count;
 
        if (print_all_hashes)
-               printf("Print all hashes\n");
+               printf("Print all hashes ");
        else
-               printf("Print top %d hashes\n", num_to_print);
+               printf("Print top %d hashes ", num_to_print);
 
-       printf("Hash, # Blocks, # Files\n");
+       printf("(this may take some time)\n");
 
-       while (1) {
-               if (node == NULL)
-                       break;
+       while ((ret = sqlite3_step(top_hashes_stmt)) == SQLITE_ROW) {
+               digest = (unsigned char *)sqlite3_column_blob(top_hashes_stmt, 
0);
+               count = sqlite3_column_int64(top_hashes_stmt, 1);
+
+               ret = sqlite3_bind_blob(files_count_stmt, 1, digest, digest_len,
+                                       SQLITE_STATIC);
+               if (ret) {
+                       fprintf(stderr, "Error %d binding digest: %s\n", ret,
+                               sqlite3_errstr(ret));
+                       return;
+               }
+
+               ret = sqlite3_step(files_count_stmt);
+               if (ret != SQLITE_ROW && ret != SQLITE_DONE) {
+                       fprintf(stderr, "error %d, file count search: %s\n",
+                               ret, sqlite3_errstr(ret));
+                       return;
+               }
+
+               files_count = sqlite3_column_int64(files_count_stmt, 0);
+
+               if (!header_printed) {
+                       printf("Hash, # Blocks, # Files\n");
+                       header_printed = 1;
+               }
+
+               debug_print_digest(stdout, digest);
+               printf(", %"PRIu64", %"PRIu64"\n", count, files_count);
 
-               dups = rb_entry(node, struct dupe_blocks_list, dl_by_size);
+               sqlite3_reset(files_count_stmt);
 
-               debug_print_digest(stdout, dups->dl_hash);
-               printf(", %u, %u\n", dups->dl_num_elem, dups->dl_num_files);
                if (print_blocks) {
-                       list_for_each_entry(block, &dups->dl_list,
-                                           b_list) {
-                               struct filerec *f = block->b_file;
-                               printf("  %s\tloff: %llu lblock: %llu "
-                                      "flags: 0x%x ", f->filename,
-                                      (unsigned long long)block->b_loff,
-                                      (unsigned long long)block->b_loff / 
blocksize,
-                                      block->b_flags);
-                               printf_file_block_flags(block);
-                               printf("\n");
-                       }
+                       ret = print_all_blocks(digest);
+                       if (ret)
+                               return;
                }
 
-               if (!print_all_hashes && --num_to_print == 0)
+               if (!print_all_hashes && --num_to_print == 0) {
+                       ret = SQLITE_DONE;
                        break;
-
-               node = rb_next(node);
+               }
        }
+       if (ret != SQLITE_DONE) {
+               fprintf(stderr, "error %d retrieving hashes from table: %s\n",
+                       ret, sqlite3_errstr(ret));
+       }
+}
+
+static int print_files_cb(void *priv, int argc, char **argv, char **column)
+{
+       int i;
+       for(i = 0; i < argc; i++)
+               printf("%s\t", argv[i]);
+       printf("\n");
+       return 0;
 }
 
 static void print_filerecs(void)
 {
-       struct filerec *file;
+       int ret;
+       char *errorstr;
+
+#define        LIST_FILES                                                      
\
+"select ino, subvol, blocks, size, filename from files;"
 
-       printf("Showing %llu files.\nInode\tBlocks Stored\tSubvold 
ID\tFilename\n",
-               num_filerecs);
+       printf("Showing %"PRIu64" files.\nInode\tSubvol ID\tBlocks 
Stored\tSize\tFilename\n",
+               disk_files);
 
-       list_for_each_entry(file, &filerec_list, rec_list) {
-               printf("%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%s\n", file->inum,
-                      file->num_blocks, file->subvolid, file->filename);
+       ret = sqlite3_exec(gdb, LIST_FILES, print_files_cb, gdb, &errorstr);
+       if (ret) {
+               fprintf(stderr, "error %d, executing file search: %s\n", ret,
+                       errorstr);
+               return;
        }
 }
 
 static unsigned int disk_blocksize;
 static int major, minor;
-static uint64_t disk_files, disk_hashes;
 
-static void print_file_info(struct hash_tree *tree)
+static void print_file_info(void)
 {
        printf("Raw header info for \"%s\":\n", serialize_fname);
        printf("  version: %u.%u\tblock_size: %u\n", major, minor,
               disk_blocksize);
        printf("  num_files: %"PRIu64"\tnum_hashes: %"PRIu64"\n",
               disk_files, disk_hashes);
-       printf("Loaded hashes from %"PRIu64" blocks into %"PRIu64" nodes\n",
-              tree->num_blocks, tree->num_hashes);
-       printf("Loaded %llu file records\n", num_filerecs);
 }
 
 static void usage(const char *prog)
@@ -280,20 +350,20 @@
 
        blocksize = disk_blocksize;
 
-       ret = dbfile_read_all_hashes(&tree);
+       ret = prepare_statements();
        if (ret)
                return ret;
 
-       print_file_info(&tree);
+       print_file_info();
 
-       if (num_to_print || print_all_hashes) {
-               sort_by_size(&tree);
+       if (num_to_print || print_all_hashes)
                print_by_size();
-       }
 
        if (print_file_list)
                print_filerecs();
 
+       finalize_statements();
+
        dbfile_close();
 
        return ret;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-0.10.beta3/util.c 
new/duperemove-0.10.beta4/util.c
--- old/duperemove-0.10.beta3/util.c    2015-07-10 09:46:56.000000000 +0200
+++ new/duperemove-0.10.beta4/util.c    2015-07-16 22:52:51.000000000 +0200
@@ -25,6 +25,7 @@
 #include <ctype.h>
 #include <inttypes.h>
 #include <execinfo.h>
+#include <sys/time.h>
 
 #include "debug.h"
 #include "util.h"
@@ -131,3 +132,23 @@
                printf("%s\n", messages[i]);
        free(messages);
 }
+
+void record_start(struct elapsed_time *e, const char *name)
+{
+       e->name = name;
+       gettimeofday(&e->start, NULL);
+}
+
+static void record_end(struct elapsed_time *e)
+{
+       gettimeofday(&e->end, NULL);
+
+       e->elapsed = (e->end.tv_sec - e->start.tv_sec) +
+               ((e->end.tv_usec - e->start.tv_usec) / 1000000.0F);
+}
+
+void record_end_print(struct elapsed_time *e)
+{
+       record_end(e);
+       printf("%s took %fs\n", e->name, e->elapsed);
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/duperemove-0.10.beta3/util.h 
new/duperemove-0.10.beta4/util.h
--- old/duperemove-0.10.beta3/util.h    2015-07-10 09:46:56.000000000 +0200
+++ new/duperemove-0.10.beta4/util.h    2015-07-16 22:52:51.000000000 +0200
@@ -21,4 +21,14 @@
                _str;                                                   \
        })
 
+/* Trivial wrapper around gettimeofday */
+struct elapsed_time {
+       struct timeval  start;
+       struct timeval  end;
+       const char      *name;
+       double          elapsed;
+};
+void record_start(struct elapsed_time *e, const char *name);
+void record_end_print(struct elapsed_time *e);
+
 #endif /* __UTIL_H__ */


Reply via email to