dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 99c3b317f9e0558d624235439a4162306f8e549d
parent b1fd149a54efd7fe0d3bf4dcd5b3c48467d4434f
Author: sin <sin@2f30.org>
Date:   Tue, 19 Feb 2019 10:17:52 +0000

Rework stats

Diffstat:
Mdedup.c | 71+++++++++++++++++++++++++++++++++++++----------------------------------
1 file changed, 37 insertions(+), 34 deletions(-)

diff --git a/dedup.c b/dedup.c @@ -25,7 +25,7 @@ #define MDSIZE SHA256_DIGEST_LENGTH /* file format version */ -#define VER_MIN 0 +#define VER_MIN 1 #define VER_MAJ 0 #define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) @@ -39,8 +39,12 @@ struct stats { uint64_t orig_size; uint64_t comp_size; uint64_t dedup_size; + uint64_t min_blk_size; + uint64_t max_blk_size; + uint64_t nblks; uint64_t cache_hits; uint64_t cache_misses; + uint64_t reserved[4]; }; /* index file header */ @@ -48,6 +52,7 @@ struct enthdr { uint64_t flags; uint64_t nents; uint64_t store_size; + uint64_t reserved[4]; struct stats st; }; @@ -208,7 +213,7 @@ decomp(uint8_t *in, uint8_t *out, size_t insize, size_t outsize) } void -print_md(const uint8_t *md, size_t size) +print_md(uint8_t *md, size_t size) { size_t i; @@ -217,6 +222,27 @@ print_md(const uint8_t *md, size_t size) } void +print_stats(struct stats *st) +{ + fprintf(stderr, "original size: %llu bytes\n", + (unsigned long long)st->orig_size); + fprintf(stderr, "compressed size: %llu bytes\n", + (unsigned long long)st->comp_size); + fprintf(stderr, "deduplicated size: %llu bytes\n", + (unsigned long long)st->dedup_size); + fprintf(stderr, "min/avg/max block size: %llu/%llu/%llu\n", + (unsigned long long)st->min_blk_size, + (unsigned long long)st->dedup_size / st->nblks, + (unsigned long long)st->max_blk_size); + fprintf(stderr, "number of blocks: %llu\n", + (unsigned long long)st->nblks); + fprintf(stderr, "total cache hits: %llu\n", + (unsigned long long)st->cache_hits); + fprintf(stderr, "total cache misses: %llu\n", + (unsigned long long)st->cache_misses); +} + +void str2bin(char *s, uint8_t *d) { size_t i, size = strlen(s) / 2; @@ -488,7 +514,13 @@ dedup(int fd, char *msg) cache_dirty = 1; enthdr.st.dedup_size += bdescr.size; + enthdr.st.nblks++; enthdr.st.cache_misses++; + + if (bdescr.size > enthdr.st.max_blk_size) + enthdr.st.max_blk_size = bdescr.size; + if (bdescr.size < enthdr.st.min_blk_size) + enthdr.st.min_blk_size = bdescr.size; } else { ent->bdescr[ent->nblks++] = bdescr; enthdr.st.cache_hits++; @@ -651,11 +683,6 @@ void init_cache(void) { uint64_t nents, i; - uint64_t min, max, avg; - - min = comp_size(BLKSIZE); - max = 0; - avg = 0; nents = cache_nents(); xlseek(cfd, 0, SEEK_SET); @@ -666,20 +693,6 @@ init_cache(void) if (xread(cfd, &cent->bdescr, sizeof(cent->bdescr)) == 0) errx(1, "read: unexpected EOF"); add_cent(cent); - - if (cent->bdescr.size > max) - max = cent->bdescr.size; - if (cent->bdescr.size < min) - min = cent->bdescr.size; - avg += cent->bdescr.size; - } - avg /= nents; - - if (verbose) { - fprintf(stderr, "min/avg/max block size: %llu/%llu/%llu\n", - (unsigned long long)min, - (unsigned long long)avg, - (unsigned long long)max); } } @@ -720,19 +733,7 @@ init(void) } else { enthdr.flags = (VER_MAJ << 8) | VER_MIN; xwrite(ifd, &enthdr, sizeof(enthdr)); - } - - if (verbose) { - fprintf(stderr, "original size: %llu bytes\n", - (unsigned long long)enthdr.st.orig_size); - fprintf(stderr, "compressed size: %llu bytes\n", - (unsigned long long)enthdr.st.comp_size); - fprintf(stderr, "deduplicated size: %llu bytes\n", - (unsigned long long)enthdr.st.dedup_size); - fprintf(stderr, "total cache hits: %llu\n", - (unsigned long long)enthdr.st.cache_hits); - fprintf(stderr, "total cache misses: %llu\n", - (unsigned long long)enthdr.st.cache_misses); + enthdr.st.min_blk_size = comp_size(BLKSIZE); } if (cache_nents() != 0) @@ -744,6 +745,8 @@ init(void) void term(void) { + if (verbose) + print_stats(&enthdr.st); flush_cache(); free_cache();