Hello community, here is the log from the commit of package duperemove for openSUSE:Factory checked in at 2015-07-19 11:45:46 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/duperemove (Old) and /work/SRC/openSUSE:Factory/.duperemove.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "duperemove" Changes: -------- --- /work/SRC/openSUSE:Factory/duperemove/duperemove.changes 2015-07-14 17:45:19.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.duperemove.new/duperemove.changes 2015-07-19 11:45:47.000000000 +0200 @@ -1,0 +2,6 @@ +Thu Jul 16 21:45:34 UTC 2015 - mfas...@suse.com + +- Update to duperemove v0.10.beta4 + - Better memory usage with hashstats utility + +------------------------------------------------------------------- Old: ---- v0.10.beta3.tar.gz New: ---- v0.10.beta4.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ duperemove.spec ++++++ --- /var/tmp/diff_new_pack.JjD2Cz/_old 2015-07-19 11:45:48.000000000 +0200 +++ /var/tmp/diff_new_pack.JjD2Cz/_new 2015-07-19 11:45:48.000000000 +0200 @@ -16,10 +16,10 @@ # -%define tar_version 0.10.beta3 +%define tar_version 0.10.beta4 %define samename btrfs-extent-same Name: duperemove -Version: 0.10.beta3 +Version: 0.10.beta4 Release: 0 Summary: Software to find duplicate extents in files and remove them License: GPL-2.0 @@ -28,7 +28,6 @@ Source: https://github.com/markfasheh/%{name}/archive/v%{tar_version}.tar.gz BuildRequires: gcc-c++ BuildRequires: glib2-devel -BuildRequires: libgcrypt-devel BuildRequires: sqlite3-devel BuildRoot: %{_tmppath}/%{name}-%{version}-build ++++++ v0.10.beta3.tar.gz -> v0.10.beta4.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-0.10.beta3/Makefile new/duperemove-0.10.beta4/Makefile --- old/duperemove-0.10.beta3/Makefile 2015-07-10 09:46:56.000000000 +0200 +++ new/duperemove-0.10.beta4/Makefile 2015-07-16 22:52:51.000000000 +0200 @@ -1,4 +1,4 @@ -VER=0.10.beta3 +VER=0.10.beta4 RELEASE=v$(VER) CC = gcc diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-0.10.beta3/dbfile.c new/duperemove-0.10.beta4/dbfile.c --- old/duperemove-0.10.beta3/dbfile.c 2015-07-10 09:46:56.000000000 +0200 +++ new/duperemove-0.10.beta4/dbfile.c 2015-07-16 22:52:51.000000000 +0200 @@ -21,7 +21,8 @@ #define DB_FILE_MAJOR 1 #define DB_FILE_MINOR 2 -static sqlite3 *gdb = NULL; +/* exported for hashstats.c */ +sqlite3 *gdb = NULL; #if (SQLITE_VERSION_NUMBER < 3007015) #define perror_sqlite(_err, _why) \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-0.10.beta3/hashstats.c new/duperemove-0.10.beta4/hashstats.c --- old/duperemove-0.10.beta3/hashstats.c 2015-07-10 09:46:56.000000000 +0200 +++ new/duperemove-0.10.beta4/hashstats.c 2015-07-16 22:52:51.000000000 +0200 @@ -33,6 +33,8 @@ #include "bswap.h" +extern sqlite3 *gdb; + int verbose = 0, debug = 0; unsigned int blocksize; static int version_only = 0; @@ -41,144 +43,212 @@ static int num_to_print = 10; static int print_file_list = 0; static char *serialize_fname = NULL; -static struct rb_root by_size = RB_ROOT; +static uint64_t disk_files, disk_hashes; -static int cmp(struct dupe_blocks_list *tmp, struct dupe_blocks_list *dups) -{ - if (tmp->dl_num_elem < dups->dl_num_elem) - return -1; - else if (tmp->dl_num_elem > dups->dl_num_elem) - return 1; - return memcmp(dups->dl_hash, tmp->dl_hash, digest_len); -} +static sqlite3_stmt *top_hashes_stmt = NULL; +static sqlite3_stmt *files_count_stmt = NULL; +static sqlite3_stmt *find_blocks_stmt = NULL; -static void insert_by_size(struct dupe_blocks_list *dups) +static int prepare_statements(void) { - struct rb_node **p = &by_size.rb_node; - struct rb_node *parent = NULL; - struct dupe_blocks_list *tmp; int ret; - while (*p) { - parent = *p; - - tmp = rb_entry(parent, struct dupe_blocks_list, dl_by_size); +#define FIND_TOP_HASHES \ +"select digest, count(digest) from hashes group by digest having (count(digest) > 1) order by (count(digest)) desc;" + ret = sqlite3_prepare_v2(gdb, FIND_TOP_HASHES, -1, &top_hashes_stmt, + NULL); + if (ret) { + fprintf(stderr, "error %d while prepping hash search stmt: %s\n", + ret, sqlite3_errstr(ret)); + return ret; + } - ret = cmp(tmp, dups); - if (ret < 0) - p = &(*p)->rb_left; - else if (ret > 0) - p = &(*p)->rb_right; - else - break; +#define FIND_FILES_COUNT \ +"select count (distinct files.filename) from files INNER JOIN hashes on hashes.digest = ?1 AND files.subvol=hashes.subvol AND files.ino=hashes.ino;" + ret = sqlite3_prepare_v2(gdb, FIND_FILES_COUNT, -1, &files_count_stmt, + NULL); + if (ret) { + fprintf(stderr, "error %d while preparing file count stmt: %s\n", + ret, sqlite3_errstr(ret)); + return ret; } - rb_link_node(&dups->dl_by_size, parent, p); - rb_insert_color(&dups->dl_by_size, &by_size); +#define FIND_BLOCKS \ +"select files.filename, hashes.loff, hashes.flags from files INNER JOIN hashes on hashes.digest = ?1 AND files.subvol=hashes.subvol AND files.ino=hashes.ino;" + + ret = sqlite3_prepare_v2(gdb, FIND_BLOCKS, -1, &find_blocks_stmt, NULL); + if (ret) { + fprintf(stderr, "error %d while prepping find blocks stmt: %s\n", + ret, sqlite3_errstr(ret)); + return ret; + } + return 0; } -static void sort_by_size(struct hash_tree *tree) +static void finalize_statements(void) { - struct rb_root *root = &tree->root; - struct rb_node *node = rb_first(root); - struct dupe_blocks_list *dups; - - while (1) { - if (node == NULL) - break; - - dups = rb_entry(node, struct dupe_blocks_list, dl_node); - - insert_by_size(dups); - - node = rb_next(node); - } + sqlite3_finalize(top_hashes_stmt); + sqlite3_finalize(files_count_stmt); + sqlite3_finalize(find_blocks_stmt); } -static void printf_file_block_flags(struct file_block *block) +static void printf_file_block_flags(unsigned int flags) { - if (!block->b_flags) + if (!flags) return; printf("( "); - if (block->b_flags & FILE_BLOCK_SKIP_COMPARE) + if (flags & FILE_BLOCK_SKIP_COMPARE) printf("skip_compare "); - if (block->b_flags & FILE_BLOCK_DEDUPED) + if (flags & FILE_BLOCK_DEDUPED) printf("deduped "); - if (block->b_flags & FILE_BLOCK_HOLE) + if (flags & FILE_BLOCK_HOLE) printf("hole "); + if (flags & FILE_BLOCK_PARTIAL) + printf("partial "); printf(")"); } +static int print_all_blocks(unsigned char *digest) +{ + int ret; + uint64_t loff; + unsigned int flags; + const unsigned char *filename; + + ret = sqlite3_bind_blob(find_blocks_stmt, 1, digest, digest_len, + SQLITE_STATIC); + if (ret) { + fprintf(stderr, "Error %d binding digest for blocks: %s\n", ret, + sqlite3_errstr(ret)); + return ret; + } + + while ((ret = sqlite3_step(find_blocks_stmt)) == SQLITE_ROW) { + filename = sqlite3_column_text(find_blocks_stmt, 0); + loff = sqlite3_column_int64(find_blocks_stmt, 1); + flags = sqlite3_column_int(find_blocks_stmt, 2); + + printf(" %s\tloff: %llu lblock: %llu " + "flags: 0x%x ", filename, + (unsigned long long)loff, + (unsigned long long)loff / blocksize, + flags); + printf_file_block_flags(flags); + printf("\n"); + } + if (ret != SQLITE_DONE) { + fprintf(stderr, + "error %d running block stmt: %s\n", + ret, sqlite3_errstr(ret)); + return ret; + } + + sqlite3_reset(find_blocks_stmt); + + return 0; +} + static void print_by_size(void) { - struct rb_node *node = rb_first(&by_size); - struct dupe_blocks_list *dups; - struct file_block *block; + int ret; + int header_printed = 0; + unsigned char *digest; + uint64_t count, files_count; if (print_all_hashes) - printf("Print all hashes\n"); + printf("Print all hashes "); else - printf("Print top %d hashes\n", num_to_print); + printf("Print top %d hashes ", num_to_print); - printf("Hash, # Blocks, # Files\n"); + printf("(this may take some time)\n"); - while (1) { - if (node == NULL) - break; + while ((ret = sqlite3_step(top_hashes_stmt)) == SQLITE_ROW) { + digest = (unsigned char *)sqlite3_column_blob(top_hashes_stmt, 0); + count = sqlite3_column_int64(top_hashes_stmt, 1); + + ret = sqlite3_bind_blob(files_count_stmt, 1, digest, digest_len, + SQLITE_STATIC); + if (ret) { + fprintf(stderr, "Error %d binding digest: %s\n", ret, + sqlite3_errstr(ret)); + return; + } + + ret = sqlite3_step(files_count_stmt); + if (ret != SQLITE_ROW && ret != SQLITE_DONE) { + fprintf(stderr, "error %d, file count search: %s\n", + ret, sqlite3_errstr(ret)); + return; + } + + files_count = sqlite3_column_int64(files_count_stmt, 0); + + if (!header_printed) { + printf("Hash, # Blocks, # Files\n"); + header_printed = 1; + } + + debug_print_digest(stdout, digest); + printf(", %"PRIu64", %"PRIu64"\n", count, files_count); - dups = rb_entry(node, struct dupe_blocks_list, dl_by_size); + sqlite3_reset(files_count_stmt); - debug_print_digest(stdout, dups->dl_hash); - printf(", %u, %u\n", dups->dl_num_elem, dups->dl_num_files); if (print_blocks) { - list_for_each_entry(block, &dups->dl_list, - b_list) { - struct filerec *f = block->b_file; - printf(" %s\tloff: %llu lblock: %llu " - "flags: 0x%x ", f->filename, - (unsigned long long)block->b_loff, - (unsigned long long)block->b_loff / blocksize, - block->b_flags); - printf_file_block_flags(block); - printf("\n"); - } + ret = print_all_blocks(digest); + if (ret) + return; } - if (!print_all_hashes && --num_to_print == 0) + if (!print_all_hashes && --num_to_print == 0) { + ret = SQLITE_DONE; break; - - node = rb_next(node); + } } + if (ret != SQLITE_DONE) { + fprintf(stderr, "error %d retrieving hashes from table: %s\n", + ret, sqlite3_errstr(ret)); + } +} + +static int print_files_cb(void *priv, int argc, char **argv, char **column) +{ + int i; + for(i = 0; i < argc; i++) + printf("%s\t", argv[i]); + printf("\n"); + return 0; } static void print_filerecs(void) { - struct filerec *file; + int ret; + char *errorstr; + +#define LIST_FILES \ +"select ino, subvol, blocks, size, filename from files;" - printf("Showing %llu files.\nInode\tBlocks Stored\tSubvold ID\tFilename\n", - num_filerecs); + printf("Showing %"PRIu64" files.\nInode\tSubvol ID\tBlocks Stored\tSize\tFilename\n", + disk_files); - list_for_each_entry(file, &filerec_list, rec_list) { - printf("%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%s\n", file->inum, - file->num_blocks, file->subvolid, file->filename); + ret = sqlite3_exec(gdb, LIST_FILES, print_files_cb, gdb, &errorstr); + if (ret) { + fprintf(stderr, "error %d, executing file search: %s\n", ret, + errorstr); + return; } } static unsigned int disk_blocksize; static int major, minor; -static uint64_t disk_files, disk_hashes; -static void print_file_info(struct hash_tree *tree) +static void print_file_info(void) { printf("Raw header info for \"%s\":\n", serialize_fname); printf(" version: %u.%u\tblock_size: %u\n", major, minor, disk_blocksize); printf(" num_files: %"PRIu64"\tnum_hashes: %"PRIu64"\n", disk_files, disk_hashes); - printf("Loaded hashes from %"PRIu64" blocks into %"PRIu64" nodes\n", - tree->num_blocks, tree->num_hashes); - printf("Loaded %llu file records\n", num_filerecs); } static void usage(const char *prog) @@ -280,20 +350,20 @@ blocksize = disk_blocksize; - ret = dbfile_read_all_hashes(&tree); + ret = prepare_statements(); if (ret) return ret; - print_file_info(&tree); + print_file_info(); - if (num_to_print || print_all_hashes) { - sort_by_size(&tree); + if (num_to_print || print_all_hashes) print_by_size(); - } if (print_file_list) print_filerecs(); + finalize_statements(); + dbfile_close(); return ret; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-0.10.beta3/util.c new/duperemove-0.10.beta4/util.c --- old/duperemove-0.10.beta3/util.c 2015-07-10 09:46:56.000000000 +0200 +++ new/duperemove-0.10.beta4/util.c 2015-07-16 22:52:51.000000000 +0200 @@ -25,6 +25,7 @@ #include <ctype.h> #include <inttypes.h> #include <execinfo.h> +#include <sys/time.h> #include "debug.h" #include "util.h" @@ -131,3 +132,23 @@ printf("%s\n", messages[i]); free(messages); } + +void record_start(struct elapsed_time *e, const char *name) +{ + e->name = name; + gettimeofday(&e->start, NULL); +} + +static void record_end(struct elapsed_time *e) +{ + gettimeofday(&e->end, NULL); + + e->elapsed = (e->end.tv_sec - e->start.tv_sec) + + ((e->end.tv_usec - e->start.tv_usec) / 1000000.0F); +} + +void record_end_print(struct elapsed_time *e) +{ + record_end(e); + printf("%s took %fs\n", e->name, e->elapsed); +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-0.10.beta3/util.h new/duperemove-0.10.beta4/util.h --- old/duperemove-0.10.beta3/util.h 2015-07-10 09:46:56.000000000 +0200 +++ new/duperemove-0.10.beta4/util.h 2015-07-16 22:52:51.000000000 +0200 @@ -21,4 +21,14 @@ _str; \ }) +/* Trivial wrapper around gettimeofday */ +struct elapsed_time { + struct timeval start; + struct timeval end; + const char *name; + double elapsed; +}; +void record_start(struct elapsed_time *e, const char *name); +void record_end_print(struct elapsed_time *e); + #endif /* __UTIL_H__ */