Hello community, here is the log from the commit of package duperemove for openSUSE:Factory checked in at 2014-11-19 20:26:42 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/duperemove (Old) and /work/SRC/openSUSE:Factory/.duperemove.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "duperemove" Changes: -------- --- /work/SRC/openSUSE:Factory/duperemove/duperemove.changes 2014-10-31 20:03:36.000000000 +0100 +++ /work/SRC/openSUSE:Factory/.duperemove.new/duperemove.changes 2014-11-19 20:30:29.000000000 +0100 @@ -1,0 +2,18 @@ +Mon Nov 17 19:40:06 UTC 2014 - mfas...@suse.com + +- Update to duperemove v0.09.beta3 + - Fix leak of directory fd during file scan + - Fix EMFILES (too many file descriptors) error during dedupe + - Fix corner case with dedupe leaving a file open and not-queued + - Support '-x' (one file system) option + - Add option to turn off extent lookup during csum phase + - Useful if running against snapshotted volumes + - show-shared-extents program to help users examine file state before or + after dedupe. + +------------------------------------------------------------------- +Mon Nov 10 14:49:27 UTC 2014 - sch...@suse.de + +- Build with %optflags + +------------------------------------------------------------------- Old: ---- duperemove-v0.09.beta2.tar.gz New: ---- duperemove-v0.09.beta3.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ duperemove.spec ++++++ --- /var/tmp/diff_new_pack.MGU3Ld/_old 2014-11-19 20:30:30.000000000 +0100 +++ /var/tmp/diff_new_pack.MGU3Ld/_new 2014-11-19 20:30:30.000000000 +0100 @@ -17,13 +17,13 @@ %define modname duperemove -%define tar_version v0.09.beta2 +%define tar_version v0.09.beta3 Name: duperemove BuildRequires: gcc-c++ BuildRequires: glib2-devel BuildRequires: libgcrypt-devel -Version: 0.09~beta2 +Version: 0.09~beta3 Release: 0 Summary: Software to find duplicate extents in files and remove them License: GPL-2.0 @@ -49,14 +49,15 @@ %setup -q -n %{modname}-%{tar_version} %build -make -make hashstats -make btrfs-extent-same +make CFLAGS="%optflags" +make hashstats CFLAGS="%optflags" +make btrfs-extent-same CFLAGS="%optflags" %install mkdir -p %{buildroot}/%{_sbindir} cp %{_builddir}/%{modname}-%{tar_version}/%{modname} %{buildroot}/%{_sbindir} cp %{_builddir}/%{modname}-%{tar_version}/hashstats %{buildroot}/%{_sbindir} +cp %{_builddir}/%{modname}-%{tar_version}/show-shared-extents %{buildroot}/%{_sbindir} cp %{_builddir}/%{modname}-%{tar_version}/%{samename} %{buildroot}/%{_sbindir} mkdir -p %{buildroot}%{_mandir}/man8 cp %{_builddir}/%{modname}-%{tar_version}/%{modname}.8 %{buildroot}/%{_mandir}/man8/ @@ -72,6 +73,7 @@ %doc LICENSE README %{_sbindir}/duperemove %{_sbindir}/hashstats +%{_sbindir}/show-shared-extents %{_mandir}/man?/%{modname}.8.gz %changelog ++++++ duperemove-v0.09.beta2.tar.gz -> duperemove-v0.09.beta3.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/Makefile new/duperemove-v0.09.beta3/Makefile --- old/duperemove-v0.09.beta2/Makefile 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/Makefile 2014-11-17 20:07:48.000000000 +0100 @@ -1,43 +1,67 @@ -RELEASE=v0.09.beta2 +RELEASE=v0.09.beta3 CC = gcc CFLAGS = -Wall -ggdb MANPAGES=duperemove.8 btrfs-extent-same.8 -DIST_SOURCES=csum-gcrypt.c csum-mhash.c csum.h duperemove.c hash-tree.c hash-tree.h results-tree.c results-tree.h kernel.h LICENSE list.h Makefile rbtree.c rbtree.h rbtree.txt README TODO dedupe.c dedupe.h btrfs-ioctl.h filerec.c filerec.h btrfs-util.c btrfs-util.h $(MANPAGES) btrfs-extent-same.c debug.h util.c util.h serialize.c serialize.h hashstats.c +CFILES=duperemove.c hash-tree.c results-tree.c rbtree.c dedupe.c filerec.c \ + btrfs-util.c util.c serialize.c memstats.c +hash_impl_CFILES=csum-mhash.c csum-gcrypt.c +hashstats_CFILES=hashstats.c +btrfs_extent_same_CFILES=btrfs-extent-same.c +csum_test_CFILES=csum-test.c +DIST_CFILES:=$(CFILES) $(hashstats_CFILES) $(btrfs_extent_same_CFILES) \ + $(csum_test_CFILES) $(hash_impl_CFILES) +HEADERS=csum.h hash-tree.h results-tree.h kernel.h list.h rbtree.h dedupe.h \ + btrfs-ioctl.h filerec.h btrfs-util.h debug.h util.h serialize.h \ + memstats.h +DIST_SOURCES:=$(DIST_CFILES) $(HEADERS) LICENSE Makefile rbtree.txt README \ + TODO $(MANPAGES) SubmittingPatches DIST=duperemove-$(RELEASE) DIST_TARBALL=$(DIST).tar.gz TEMP_INSTALL_DIR:=$(shell mktemp -du -p .) -hash_obj=csum-gcrypt.o +crypt_CFILES=csum-gcrypt.c crypt_CFLAGS=$(shell libgcrypt-config --cflags) crypt_LIBS=$(shell libgcrypt-config --libs) ifdef USE_MHASH - hash_obj=csum-mhash.o + crypt_CFILES=csum-mhash.c crypt_CFLAGS= crypt_LIBS=-lmhash endif +crypt_obj=$(crypt_CFILES:.c=.o) + +CFILES += $(crypt_CFILES) +objects = $(CFILES:.c=.o) + +hashstats_obj = $(crypt_obj) rbtree.o hash-tree.o filerec.o util.o serialize.o \ + results-tree.o +show_shared_obj = rbtree.o util.o +csum_test_obj = $(crypt_obj) util.o + +progs = duperemove hashstats btrfs-extent-same show-shared-extents csum-test glib_CFLAGS=$(shell pkg-config --cflags glib-2.0) glib_LIBS=$(shell pkg-config --libs glib-2.0) override CFLAGS += -D_FILE_OFFSET_BITS=64 -DVERSTRING=\"$(RELEASE)\" \ - $(crypt_CFLAGS) $(glib_CFLAGS) + $(crypt_CFLAGS) $(glib_CFLAGS) -rdynamic LIBRARY_FLAGS += $(crypt_LIBS) $(glib_LIBS) -objects = duperemove.o rbtree.o hash-tree.o results-tree.o dedupe.o filerec.o util.o serialize.o btrfs-util.o $(hash_obj) -progs = duperemove - DESTDIR = / PREFIX = /usr/local SHAREDIR = $(PREFIX)/share SBINDIR = $(PREFIX)/sbin MANDIR = $(SHAREDIR)/man -all: $(progs) kernel.h list.h btrfs-ioctl.h debug.h +.c.o: + $(CC) $(CFLAGS) -c $< -o $@ $(LIBRARY_FLAGS) -duperemove: $(objects) kernel.h duperemove.c +all: $(progs) +#TODO: Replace this with an auto-dependency +$(objects): $(HEADERS) +duperemove: $(objects) $(CC) $(CFLAGS) $(objects) -o duperemove $(LIBRARY_FLAGS) tarball: clean @@ -59,15 +83,14 @@ install -m 0644 $$man $(DESTDIR)$(MANDIR)/man8; \ done -csum-test: $(hash_obj) csum-test.c - $(CC) $(CFLAGS) $(hash_obj) -o csum-test csum-test.c $(LIBRARY_FLAGS) +csum-test: $(csum_test_obj) csum-test.c + $(CC) $(CFLAGS) $(csum_test_obj) -o csum-test csum-test.c $(LIBRARY_FLAGS) -filerec-test: filerec.c filerec.h rbtree.o - $(CC) $(CFLAGS) -DFILEREC_TEST filerec.c rbtree.o -o filerec-test $(LIBRARY_FLAGS) +show-shared-extents: $(show_shared_obj) filerec.c + $(CC) $(CFLAGS) -DFILEREC_TEST filerec.c $(show_shared_obj) -o show-shared-extents $(LIBRARY_FLAGS) -hashstats_obj = $(hash_obj) rbtree.o hash-tree.o filerec.o util.o serialize.o results-tree.o hashstats: $(hashstats_obj) hashstats.c $(CC) $(CFLAGS) $(hashstats_obj) hashstats.c -o hashstats $(LIBRARY_FLAGS) clean: - rm -fr $(objects) $(progs) $(DIST_TARBALL) btrfs-extent-same filerec-test hashstats csum-*.o *~ + rm -fr $(objects) $(progs) $(DIST_TARBALL) btrfs-extent-same filerec-test show-shared-extents hashstats csum-*.o *~ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/SubmittingPatches new/duperemove-v0.09.beta3/SubmittingPatches --- old/duperemove-v0.09.beta2/SubmittingPatches 1970-01-01 01:00:00.000000000 +0100 +++ new/duperemove-v0.09.beta3/SubmittingPatches 2014-11-17 20:07:48.000000000 +0100 @@ -0,0 +1,103 @@ +- Submit via github or e-mail to mfas...@suse.de is fine. + +- Coding style is Linux kernel except where the code differs ;) + +https://www.kernel.org/doc/Documentation/CodingStyle + +- Please sign-off your patches, Linux kernel style: + +From section 12 of https://www.kernel.org/doc/Documentation/SubmittingPatches : + +12) Sign your work + +To improve tracking of who did what, especially with patches that can +percolate to their final resting place in the kernel through several +layers of maintainers, we've introduced a "sign-off" procedure on +patches that are being emailed around. + +The sign-off is a simple line at the end of the explanation for the +patch, which certifies that you wrote it or otherwise have the right to +pass it on as an open-source patch. The rules are pretty simple: if you +can certify the below: + + Developer's Certificate of Origin 1.1 + + By making a contribution to this project, I certify that: + + (a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + + (b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + + (c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + + (d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + +then you just add a line saying + + Signed-off-by: Random J Developer <ran...@developer.example.org> + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +Some people also put extra tags at the end. They'll just be ignored for +now, but you can do this to mark internal company procedures or just +point out some special detail about the sign-off. + +If you are a subsystem or branch maintainer, sometimes you need to slightly +modify patches you receive in order to merge them, because the code is not +exactly the same in your tree and the submitters'. If you stick strictly to +rule (c), you should ask the submitter to rediff, but this is a totally +counter-productive waste of time and energy. Rule (b) allows you to adjust +the code, but then it is very impolite to change one submitter's code and +make him endorse your bugs. To solve this problem, it is recommended that +you add a line between the last Signed-off-by header and yours, indicating +the nature of your changes. While there is nothing mandatory about this, it +seems like prepending the description with your mail and/or name, all +enclosed in square brackets, is noticeable enough to make it obvious that +you are responsible for last-minute changes. Example : + + Signed-off-by: Random J Developer <ran...@developer.example.org> + [lu...@maintainer.example.org: struct foo moved from foo.c to foo.h] + Signed-off-by: Lucky K Maintainer <lu...@maintainer.example.org> + +This practise is particularly helpful if you maintain a stable branch and +want at the same time to credit the author, track changes, merge the fix, +and protect the submitter from complaints. Note that under no circumstances +can you change the author's identity (the From header), as it is the one +which appears in the changelog. + +Special note to back-porters: It seems to be a common and useful practise +to insert an indication of the origin of a patch at the top of the commit +message (just after the subject line) to facilitate tracking. For instance, +here's what we see in 2.6-stable : + + Date: Tue May 13 19:10:30 2008 +0000 + + SCSI: libiscsi regression in 2.6.25: fix nop timer handling + + commit 4cf1043593db6a337f10e006c23c69e5fc93e722 upstream + +And here's what appears in 2.4 : + + Date: Tue May 13 22:12:27 2008 +0200 + + wireless, airo: waitbusy() won't delay + + [backport of 2.6 commit b7acbdfbd1f277c1eb23f344f899cfa4cd0bf36a] + +Whatever the format, this information provides a valuable help to people +tracking your trees, and to people trying to trouble-shoot bugs in your +tree. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/csum-test.c new/duperemove-v0.09.beta3/csum-test.c --- old/duperemove-v0.09.beta2/csum-test.c 1970-01-01 01:00:00.000000000 +0100 +++ new/duperemove-v0.09.beta3/csum-test.c 2014-11-17 20:07:48.000000000 +0100 @@ -0,0 +1,94 @@ +/* + * csum-test.c + * + * Test the checksumming code of duperemove. You can compare the + * output of this software with that of 'sha256sum' + * + * Copyright (C) 2014 SUSE. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Authors: Mark Fasheh <mfas...@suse.de> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include <stdio.h> + +#include "csum.h" + + +#define BUF_LEN 4096 +static unsigned char buf[BUF_LEN]; +static unsigned char digest[DIGEST_LEN_MAX] = { 0, }; + +int main(int argc, char **argv) +{ + char *fname = argv[1]; + int fd, ret; + size_t len; + struct stat s; + struct running_checksum *csum; + + init_hash(); + + if (argc != 2) { + fprintf(stderr, "Usage: %s filename\n", argv[0]); + return 1; + } + + fd = open(fname, O_RDONLY); + if (fd < 0) + return errno; + + /* + * If the test file size is less than BUF_LEN we'll exercise + * the one-shot function. Otherwise, do a running checksum. + */ + ret = fstat(fd, &s); + if (ret) + return errno; + + if (s.st_size == 0) + return 0; + + if (s.st_size <= BUF_LEN) { + len = read(fd, buf, BUF_LEN); + if (len < 0) + return errno; + if (len == 0) + return 1; + checksum_block((char *)buf, len, digest); + } else { + csum = start_running_checksum(); + + while (1) { + len = read(fd, buf, BUF_LEN); + if (len < 0) + return errno; + if (len) { + add_to_running_checksum(csum, len, buf); + } else + break; /* EOF */ + } + + finish_running_checksum(csum, digest); + } + + debug_print_digest(stdout, digest); + printf(" %s\n", fname); + + return 0; +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/debug.h new/duperemove-v0.09.beta3/debug.h --- old/duperemove-v0.09.beta2/debug.h 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/debug.h 2014-11-17 20:07:48.000000000 +0100 @@ -7,70 +7,22 @@ extern int verbose; extern int debug; -/* - * Rudimentary tracking of object allocation. Use this within a c file - * to declare the tracking variable and the print function body. - * - * In addition, debug.h needs to declare an extern and function - * prototype (see below) and print_mem_stats() in util.c needs an - * update. - */ -#define declare_alloc_tracking(_type) \ -extern long long num_##_type; \ -static inline struct _type *malloc_##_type(void) \ -{ \ - struct _type *t = malloc(sizeof(struct _type)); \ - if (t) \ - num_##_type++; \ - return t; \ -} \ -static inline struct _type *calloc_##_type(int n) \ -{ \ - struct _type *t = calloc(n, sizeof(struct _type)); \ - if (t) \ - num_##_type += n; \ - return t; \ -} \ -static inline void free_##_type(struct _type *t) \ -{ \ - if (t) { \ - num_##_type--; \ - free(t); \ - } \ -} \ -void show_allocs_##_type(void) \ -{ \ - long size = sizeof(struct _type); \ - unsigned long long total = size * num_##_type; \ - printf("struct " #_type " num: %llu sizeof: %lu total: %llu\n", \ - num_##_type, size, total); \ -} - -#define declare_alloc_tracking_header(_type) \ -long long num_##_type; \ -void show_allocs_##_type(void); - -declare_alloc_tracking_header(file_block); -declare_alloc_tracking_header(dupe_blocks_list); -declare_alloc_tracking_header(dupe_extents); -declare_alloc_tracking_header(extent); -declare_alloc_tracking_header(filerec); -declare_alloc_tracking_header(files_compared); -declare_alloc_tracking_header(filerec_token); -declare_alloc_tracking_header(file_hash_head); -/* Can be called anywhere we want to dump the above statistics */ -void print_mem_stats(void); +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) #define dprintf(args...) if (debug) printf(args) #define vprintf(args...) if (verbose) printf(args) +void print_stack_trace(void);/* defined in util.c */ #define abort_lineno() do { \ printf("ERROR: %s:%d\n", __FILE__, __LINE__); \ + print_stack_trace(); \ abort(); \ } while (0) #define abort_on(condition) do { \ - if (condition) { \ - printf("ERROR: %s:%d\n", __FILE__, __LINE__);\ + if (unlikely(condition)) { \ + printf("ERROR: %s:%d\n", __FILE__, __LINE__); \ + print_stack_trace(); \ abort(); \ } \ } while(0) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/dedupe.c new/duperemove-v0.09.beta3/dedupe.c --- old/duperemove-v0.09.beta2/dedupe.c 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/dedupe.c 2014-11-17 20:07:48.000000000 +0100 @@ -142,7 +142,7 @@ struct dedupe_ctxt *ctxt = calloc(1, sizeof(*ctxt)); struct btrfs_ioctl_same_args *same; unsigned int same_size; - unsigned int max_dest_files = max_extents - 1; + unsigned int max_dest_files; if (ctxt == NULL) return NULL; @@ -150,6 +150,8 @@ if (max_extents > MAX_DEDUPES_PER_IOCTL) max_extents = MAX_DEDUPES_PER_IOCTL; + max_dest_files = max_extents - 1; + same_size = sizeof(*same) + max_dest_files * sizeof(struct btrfs_ioctl_same_extent_info); same = calloc(1, same_size); @@ -299,8 +301,11 @@ { struct dedupe_req *req; - if (list_empty(&ctxt->completed)) - goto out; + /* + * We should not be called if dedupe_extents wasn't called or if + * we already passed back all the results.. + */ + abort_on(list_empty(&ctxt->completed)); req = list_entry(ctxt->completed.next, struct dedupe_req, req_list); list_del_init(&req->req_list); @@ -311,6 +316,6 @@ *file = req->req_file; free_dedupe_req(req); -out: + return !!list_empty(&ctxt->completed); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/duperemove.8 new/duperemove-v0.09.beta3/duperemove.8 --- old/duperemove-v0.09.beta2/duperemove.8 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/duperemove.8 2014-11-17 20:07:48.000000000 +0100 @@ -92,6 +92,25 @@ deduped from. .TP +\fB\--lookup-extents=[yes|no]\fR +While checksumming a file, duperemove will lookup file extent +state. This information is later used to optimize the search for +duplicate extents. This defaults to on. However, if you use duperemove on a +subvolume that has been snapshotted you will want to read below. + +On btrfs, extents which have been snapshotted are reported as +shared. Internally duperemove considers shared extents as +deduped. When run on a subvolume with snapshots then, duperemove may +skip some or all extents, depending on when the most recent snapshot +was taken. + +The workaround is to run duperemove at least once with +\fB\--lookup-extents=no\fR so that it considers all extents for +dedupe. You can then run with extent lookups on until your next snapshot. + +We plan to remove this restriction in a future version of duperemove. + +.TP \fB\-?, --help\fR Prints help text. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/duperemove.c new/duperemove-v0.09.beta3/duperemove.c --- old/duperemove-v0.09.beta2/duperemove.c 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/duperemove.c 2014-11-17 20:07:48.000000000 +0100 @@ -46,6 +46,7 @@ #include "util.h" #include "serialize.h" #include "btrfs-util.h" +#include "memstats.h" #include "debug.h" /* exported via debug.h */ @@ -65,12 +66,15 @@ static int recurse_dirs = 0; static int target_rw = 1; static int version_only = 0; +static int one_file_system = 0; +static dev_t one_fs_dev = 0; static int write_hashes = 0; static int scramble_filenames = 0; static int read_hashes = 0; static char *serialize_fname = NULL; static unsigned int hash_threads = 0; +static int do_lookup_extents = 1; static int fancy_status = 0; @@ -214,14 +218,18 @@ uint64_t *kern_bytes) { int ret = 0; + int last = 0; int rc; uint64_t shared_prev, shared_post; - unsigned int processed = 0; struct extent *extent; struct dedupe_ctxt *ctxt = NULL; uint64_t len = dext->de_len; LIST_HEAD(open_files); struct filerec *file; + struct extent *prev = NULL; + struct extent *to_add; + + abort_on(dext->de_num_dupes < 2); shared_prev = shared_post = 0ULL; add_shared_extents(dext, &shared_prev); @@ -231,26 +239,25 @@ extent->e_file->filename, (unsigned long long)extent->e_loff / blocksize, (unsigned long long)extent->e_loff); - processed++; + if (list_is_last(&extent->e_list, &dext->de_extents)) + last = 1; + + to_add = extent; file = extent->e_file; - if (list_empty(&file->tmp_list)) { - /* only open the file once per dedupe pass */ - ret = filerec_open(file, target_rw); - if (ret) { - fprintf(stderr, "%s: Skipping dedupe.\n", - extent->e_file->filename); - /* - * If this was our last duplicate extent in - * the list, and we added dupes from a - * previous iteration of the loop we need to - * run dedupe before exiting. - */ - if (ctxt && processed == dext->de_num_dupes) - goto run_dedupe; - continue; - } - list_add(&file->tmp_list, &open_files); + ret = filerec_open_once(file, target_rw, &open_files); + if (ret) { + fprintf(stderr, "%s: Skipping dedupe.\n", + extent->e_file->filename); + /* + * If this was our last duplicate extent in + * the list, and we added dupes from a + * previous iteration of the loop we need to + * run dedupe before exiting. + */ + if (ctxt && last) + goto run_dedupe; + continue; } if (ctxt == NULL) { @@ -264,47 +271,78 @@ goto out; } + if (!last) { + /* + * We added our file already here via + * new_dedupe_ctxt, so go to the next + * loop iteration. + */ + continue; + } + /* - * We added our file already here during - * allocation so go to the next loop - * iteration. + * We started a new context, but only have one + * extent left to dedupe (need at least + * 2). This is pretty rare but instead of + * leaving it not-deduped, we can pick the + * most recent extent off the list and re-add + * that. The old extent won't be deduped again + * but this one will. */ - continue; + abort_on(!prev); + to_add = prev; /* The ole' extent switcharoo */ } + prev = extent; /* save previous extent for condition above */ - rc = add_extent_to_dedupe(ctxt, extent->e_loff, file); + rc = add_extent_to_dedupe(ctxt, to_add->e_loff, to_add->e_file); if (rc) { - if (rc < 0) + if (rc < 0) { + /* This can only be ENOMEM. */ fprintf(stderr, "%s: Request not queued.\n", - extent->e_file->filename); + to_add->e_file->filename); + ret = ENOMEM; + goto out; + } - /* Don't continue if we reached the end of our list */ - if (processed == dext->de_num_dupes) + if (last) goto run_dedupe; continue; } run_dedupe: - printf("Dedupe %d extents with target: (%s, %s), \"%s\"\n", - ctxt->num_queued, pretty_size(ctxt->orig_file_off), - pretty_size(ctxt->orig_len), ctxt->ioctl_file->filename); + /* + * We can get here with only the target extent (0 queued) if + * filerec_open_list fails on the 2nd (and last) + * extent. + */ + if (ctxt->num_queued) { + printf("Dedupe %d extents with target: (%s, %s), " + "\"%s\"\n", + ctxt->num_queued, + pretty_size(ctxt->orig_file_off), + pretty_size(ctxt->orig_len), + ctxt->ioctl_file->filename); - ret = dedupe_extents(ctxt); - if (ret) { - ret = errno; - fprintf(stderr, - "FAILURE: Dedupe ioctl returns %d: %s\n", - ret, strerror(ret)); - } + ret = dedupe_extents(ctxt); + if (ret) { + ret = errno; + fprintf(stderr, + "FAILURE: Dedupe ioctl returns %d: %s\n", + ret, strerror(ret)); + } - process_dedupe_results(ctxt, kern_bytes); + process_dedupe_results(ctxt, kern_bytes); + } filerec_close_files_list(&open_files); free_dedupe_ctxt(ctxt); ctxt = NULL; } + abort_on(ctxt != NULL); + abort_on(!list_empty(&open_files)); + add_shared_extents(dext, &shared_post); /* * It's entirely possible that some other process is @@ -329,6 +367,8 @@ */ free_dedupe_ctxt(ctxt); + abort_on(!list_empty(&open_files)); + return ret; } @@ -368,7 +408,7 @@ uint64_t off = 0; ssize_t bytes = 0, bytes_read = 0; int ret = 0; - struct fiemap_ctxt *fc; + struct fiemap_ctxt *fc = NULL; unsigned int flags, hole; char *buf = malloc(blocksize); @@ -382,11 +422,13 @@ printf("csum: %s \t[%llu/%llu]\n", file->filename, __sync_add_and_fetch(&cur_num_filerecs, 1), num_filerecs); - fc = alloc_fiemap_ctxt(); - if (fc == NULL) /* This should be non-fatal */ - fprintf(stderr, - "Low memory allocating fiemap context for \"%s\"\n", - file->filename); + if (do_lookup_extents) { + fc = alloc_fiemap_ctxt(); + if (fc == NULL) /* This should be non-fatal */ + fprintf(stderr, + "Low memory allocating fiemap context for \"%s\"\n", + file->filename); + } ret = filerec_open(file, 0); if (ret) @@ -553,6 +595,7 @@ printf("\t-b bsize\tUse bsize blocks. Default is %dk.\n", DEFAULT_BLOCKSIZE / 1024); printf("\t-h\t\tPrint numbers in human-readable format.\n"); + printf("\t-x\t\tDon't cross filesystem boundaries.\n"); printf("\t-v\t\tBe verbose.\n"); printf("\t--hash-threads=N\n\t\t\tUse N threads for hashing phase. " "Default is automatically detected.\n"); @@ -560,6 +603,8 @@ "A file list is not required with this option.\n"); printf("\t--write-hashes=hashfile\n\t\t\tWrite hashes to a hashfile. " "These can be read in at a later date and deduped from.\n"); + printf("\t--lookup-extents=[yes|no]\n\t\t\tLookup extent info during " + "checksum phase. Defaults to yes.\n"); printf("\t--debug\t\tPrint debug messages, forces -v if selected.\n"); printf("\t--help\t\tPrints this help text.\n"); } @@ -568,6 +613,7 @@ static int walk_dir(const char *name) { + int ret = 0; struct dirent *entry; DIR *dirp; @@ -588,8 +634,10 @@ if (entry->d_type == DT_REG || (recurse_dirs && entry->d_type == DT_DIR)) - if (add_file(entry->d_name, dirfd(dirp))) - return 1; + if (add_file(entry->d_name, dirfd(dirp))) { + ret = 1; + goto out; + } } } while (entry != NULL); @@ -598,8 +646,9 @@ errno, strerror(errno), path); } +out: closedir(dirp); - return 0; + return ret; } /* @@ -614,6 +663,7 @@ char *pathtmp; struct filerec *file; uint64_t subvolid; + dev_t dev; if (len > (path_max - pathp)) { fprintf(stderr, "Path max exceeded: %s %s\n", path, name); @@ -635,6 +685,16 @@ goto out; } + dev = st.st_dev; + if (one_file_system) { + if (!one_fs_dev) + one_fs_dev = dev; + if (one_fs_dev != dev) { + dprintf("Skipping file %s because of -x\n", path); + goto out; + } + } + if (S_ISDIR(st.st_mode)) { if (walk_dir(name)) return 1; @@ -712,6 +772,15 @@ return 0; } +static int parse_yesno_option(char *arg, int default_val) +{ + if (strncmp(arg, "yes", 3) == 0) + return 1; + else if (strncmp(arg, "no", 2) == 0) + return 0; + return default_val; +} + enum { DEBUG_OPTION = CHAR_MAX + 1, HELP_OPTION, @@ -720,6 +789,8 @@ WRITE_HASHES_SCRAMBLE_OPTION, READ_HASHES_OPTION, HASH_THREADS_OPTION, + LOOKUP_EXTENTS_OPTION, + ONE_FILESYSTEM_OPTION, }; /* @@ -736,13 +807,15 @@ { "write-hashes-scramble", 1, 0, WRITE_HASHES_SCRAMBLE_OPTION }, { "read-hashes", 1, 0, READ_HASHES_OPTION }, { "hash-threads", 1, 0, HASH_THREADS_OPTION }, + { "lookup-extents", 1, 0, LOOKUP_EXTENTS_OPTION }, + { "one-file-system", 0, 0, ONE_FILESYSTEM_OPTION }, { 0, 0, 0, 0} }; if (argc < 2) return 1; - while ((c = getopt_long(argc, argv, "Ab:vdDrh?", long_ops, NULL)) + while ((c = getopt_long(argc, argv, "Ab:vdDrh?x", long_ops, NULL)) != -1) { switch (c) { case 'A': @@ -788,6 +861,13 @@ if (!hash_threads) return EINVAL; break; + case LOOKUP_EXTENTS_OPTION: + do_lookup_extents = parse_yesno_option(optarg, 1); + break; + case ONE_FILESYSTEM_OPTION: + case 'x': + one_file_system = 1; + break; case HELP_OPTION: case '?': default: @@ -1022,8 +1102,9 @@ * so we will want to account for this in a * future change. */ - if (block1->b_flags & FILE_BLOCK_DEDUPED - && block2->b_flags & FILE_BLOCK_DEDUPED) + if (do_lookup_extents && + block1->b_flags & FILE_BLOCK_DEDUPED && + block2->b_flags & FILE_BLOCK_DEDUPED) continue; file2 = block2->b_file; @@ -1063,7 +1144,7 @@ if (!fancy_status) return; - progress = (float) processed / tree->num_hashes; + progress = (float) processed / tree->num_blocks; pos = width * progress; /* Only update our status every width% */ @@ -1091,10 +1172,10 @@ return; if (err) - printf("\rSearch exited (%llu processed) with error %d: " + printf("\nSearch exited (%llu processed) with error %d: " "\"%s\"\n", processed, err, strerror(err)); else - printf("\rSearch completed with no errors. \n"); + printf("\nSearch completed with no errors. \n"); fflush(stdout); } @@ -1120,10 +1201,12 @@ goto out; } - processed++; + processed += dups->dl_num_elem; node = rb_next(node); } + + update_extent_search_status(tree, processed); out: clear_extent_search_status(processed, ret); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/filerec.c new/duperemove-v0.09.beta3/filerec.c --- old/duperemove-v0.09.beta2/filerec.c 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/filerec.c 2014-11-17 20:07:48.000000000 +0100 @@ -19,6 +19,7 @@ #include "rbtree.h" #include "list.h" #include "debug.h" +#include "memstats.h" #include "filerec.h" @@ -281,18 +282,18 @@ if (write) flags = O_RDWR; - if (file->fd == -1) { - fd = open(file->filename, flags); - if (fd == -1) { - fprintf(stderr, "Error %d: %s while opening \"%s\" " - "(write=%d)\n", - errno, strerror(errno), file->filename, write); - return errno; - } + abort_on(file->fd != -1); - file->fd = fd; + fd = open(file->filename, flags); + if (fd == -1) { + fprintf(stderr, "Error %d: %s while opening \"%s\" " + "(write=%d)\n", + errno, strerror(errno), file->filename, write); + return errno; } + file->fd = fd; + return 0; } @@ -304,6 +305,20 @@ } } +int filerec_open_once(struct filerec *file, int write, + struct list_head *open_files) +{ + int ret; + + if (list_empty(&file->tmp_list)) { + ret = filerec_open(file, write); + if (ret) + return ret; + list_add(&file->tmp_list, open_files); + } + return 0; +} + void filerec_close_files_list(struct list_head *open_files) { struct filerec *file, *tmp; @@ -437,6 +452,10 @@ return err; } +#ifdef FILEREC_TEST +static char *fiemap_flags_str(unsigned long long flags); +#endif + /* * Skeleton for this function taken from e2fsprogs.git/misc/filefrag.c * which is Copyright 2003 by Theodore Ts'o and released under the GPL. @@ -483,7 +502,7 @@ for (i = 0; i < fiemap->fm_mapped_extents; i++) { if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) last = 1; - +#ifndef FILEREC_TEST dprintf("(fiemap) [%d] fe_logical: %llu, " "fe_length: %llu, fe_physical: %llu, " "fe_flags: 0x%x\n", @@ -491,6 +510,16 @@ (unsigned long long)fm_ext[i].fe_length, (unsigned long long)fm_ext[i].fe_physical, fm_ext[i].fe_flags); +#else + dprintf("(fiemap) [%d] fe_logical: %llu, " + "fe_length: %llu, fe_physical: %llu, " + "fe_flags: 0x%x %s\n", + i, (unsigned long long)fm_ext[i].fe_logical, + (unsigned long long)fm_ext[i].fe_length, + (unsigned long long)fm_ext[i].fe_physical, + fm_ext[i].fe_flags, + fiemap_flags_str(fm_ext[i].fe_flags)); +#endif loff = fm_ext[i].fe_logical; ext_len = fm_ext[i].fe_length; @@ -540,6 +569,83 @@ } #ifdef FILEREC_TEST +#define FLAG_STR_LEN 4096 +static char flagstr[FLAG_STR_LEN]; +/* This function is not thread-safe */ +static char *fiemap_flags_str(unsigned long long flags) +{ + int size = FLAG_STR_LEN; + int written = 0; + char *str = flagstr; + + if (flags) { + written = snprintf(str, size, "("); + str += written; + size -= written; + } + + if (flags & FIEMAP_EXTENT_LAST) { + written = snprintf(str, size, "last "); + str += written; + size -= written; + } + + if (flags & FIEMAP_EXTENT_UNKNOWN) { + written = snprintf(str, size, "unknown "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_DELALLOC) { + written = snprintf(str, size, "delalloc "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_ENCODED) { + written = snprintf(str, size, "encoded "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED) { + written = snprintf(str, size, "data_encrypted "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_NOT_ALIGNED) { + written = snprintf(str, size, "not_aligned "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_DATA_INLINE) { + written = snprintf(str, size, "data_inline "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_DATA_TAIL) { + written = snprintf(str, size, "data_tail "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_UNWRITTEN) { + written = snprintf(str, size, "unwritten "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_MERGED) { + written = snprintf(str, size, "merged "); + str += written; + size -= written; + } + if (flags & FIEMAP_EXTENT_SHARED) { + written = snprintf(str, size, "shared "); + str += written; + size -= written; + } + + if (flags) + snprintf(str, size, ")"); + + return flagstr; +} int debug = 1; /* Want prints from filerec_count_shared */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/filerec.h new/duperemove-v0.09.beta3/filerec.h --- old/duperemove-v0.09.beta2/filerec.h 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/filerec.h 2014-11-17 20:07:48.000000000 +0100 @@ -36,6 +36,8 @@ int filerec_open(struct filerec *file, int write); void filerec_close(struct filerec *file); +int filerec_open_once(struct filerec *file, int write, + struct list_head *open_files); void filerec_close_files_list(struct list_head *open_files); int filerec_count_shared(struct filerec *file, uint64_t start, uint64_t len, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/hash-tree.c new/duperemove-v0.09.beta3/hash-tree.c --- old/duperemove-v0.09.beta2/hash-tree.c 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/hash-tree.c 2014-11-17 20:07:48.000000000 +0100 @@ -30,6 +30,7 @@ #include "hash-tree.h" #include "debug.h" +#include "memstats.h" declare_alloc_tracking(file_block); declare_alloc_tracking(dupe_blocks_list); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/memstats.c new/duperemove-v0.09.beta3/memstats.c --- old/duperemove-v0.09.beta2/memstats.c 1970-01-01 01:00:00.000000000 +0100 +++ new/duperemove-v0.09.beta3/memstats.c 2014-11-17 20:07:48.000000000 +0100 @@ -0,0 +1,15 @@ +#include <stdlib.h> +#include <stdio.h> + +#include "memstats.h" + +void print_mem_stats(void) +{ + printf("Duperemove memory usage statistics:\n"); + show_allocs_file_block(); + show_allocs_dupe_blocks_list(); + show_allocs_dupe_extents(); + show_allocs_extent(); + show_allocs_filerec(); + show_allocs_filerec_token(); +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/memstats.h new/duperemove-v0.09.beta3/memstats.h --- old/duperemove-v0.09.beta2/memstats.h 1970-01-01 01:00:00.000000000 +0100 +++ new/duperemove-v0.09.beta3/memstats.h 2014-11-17 20:07:48.000000000 +0100 @@ -0,0 +1,58 @@ +#ifndef __MEMSTATS_H__ +#define __MEMSTATS_H__ + +/* + * Rudimentary tracking of object allocation. Use this within a c file + * to declare the tracking variable and the print function body. + * + * In addition, memstats.h needs to declare an extern and function + * prototype (see below) and print_mem_stats() in memstats.c needs an + * update. + */ +#define declare_alloc_tracking(_type) \ +extern long long num_##_type; \ +static inline struct _type *malloc_##_type(void) \ +{ \ + struct _type *t = malloc(sizeof(struct _type)); \ + if (t) \ + num_##_type++; \ + return t; \ +} \ +static inline struct _type *calloc_##_type(int n) \ +{ \ + struct _type *t = calloc(n, sizeof(struct _type)); \ + if (t) \ + num_##_type += n; \ + return t; \ +} \ +static inline void free_##_type(struct _type *t) \ +{ \ + if (t) { \ + num_##_type--; \ + free(t); \ + } \ +} \ +void show_allocs_##_type(void) \ +{ \ + long size = sizeof(struct _type); \ + unsigned long long total = size * num_##_type; \ + printf("struct " #_type " num: %llu sizeof: %lu total: %llu\n", \ + num_##_type, size, total); \ +} + +#define declare_alloc_tracking_header(_type) \ +long long num_##_type; \ +void show_allocs_##_type(void); + +declare_alloc_tracking_header(file_block); +declare_alloc_tracking_header(dupe_blocks_list); +declare_alloc_tracking_header(dupe_extents); +declare_alloc_tracking_header(extent); +declare_alloc_tracking_header(filerec); +declare_alloc_tracking_header(files_compared); +declare_alloc_tracking_header(filerec_token); +declare_alloc_tracking_header(file_hash_head); +/* Can be called anywhere we want to dump the above statistics */ +void print_mem_stats(void); + +#endif /* __MEMSTATS_H__ */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/results-tree.c new/duperemove-v0.09.beta3/results-tree.c --- old/duperemove-v0.09.beta2/results-tree.c 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/results-tree.c 2014-11-17 20:07:48.000000000 +0100 @@ -31,6 +31,7 @@ #include "results-tree.h" #include "debug.h" +#include "memstats.h" declare_alloc_tracking(dupe_extents); declare_alloc_tracking(extent); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta2/util.c new/duperemove-v0.09.beta3/util.c --- old/duperemove-v0.09.beta2/util.c 2014-10-31 00:27:39.000000000 +0100 +++ new/duperemove-v0.09.beta3/util.c 2014-11-17 20:07:48.000000000 +0100 @@ -24,6 +24,7 @@ #include <stdint.h> #include <ctype.h> #include <inttypes.h> +#include <execinfo.h> #include "debug.h" #include "util.h" @@ -117,13 +118,16 @@ size_strs[num_divs]); } -void print_mem_stats(void) +void print_stack_trace(void) { - printf("Duperemove memory usage statistics:\n"); - show_allocs_file_block(); - show_allocs_dupe_blocks_list(); - show_allocs_dupe_extents(); - show_allocs_extent(); - show_allocs_filerec(); - show_allocs_filerec_token(); + void *trace[16]; + char **messages = (char **)NULL; + int i, trace_size = 0; + + trace_size = backtrace(trace, 16); + messages = backtrace_symbols(trace, trace_size); + printf("[stack trace follows]\n"); + for (i=0; i < trace_size; i++) + printf("%s\n", messages[i]); + free(messages); } -- To unsubscribe, e-mail: opensuse-commit+unsubscr...@opensuse.org For additional commands, e-mail: opensuse-commit+h...@opensuse.org