dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 83352fb988f55b3ea3fea53b984b8233f817d08e
parent f2671211f99e5dba4d6e3f26cace0984c4826557
Author: sin <sin@2f30.org>
Date:   Mon, 18 Feb 2019 13:10:35 +0000

Add some stats

Diffstat:
Mdedup.c | 37++++++++++++++++++++++++++++++++++---
1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/dedup.c b/dedup.c @@ -30,11 +30,19 @@ enum { WALK_STOP }; +struct stats { + uint64_t total_bytes; + uint64_t actual_bytes; + uint64_t cache_hits; + uint64_t cache_misses; +}; + /* index file header */ struct enthdr { uint64_t flags; uint64_t nents; uint64_t store_size; + struct stats st; }; /* block descriptor */ @@ -408,6 +416,8 @@ dedup(int fd, char *msg) uint8_t *inp = buf[0]; /* input buf */ uint8_t *outp = buf[1]; /* compressed buf */ + enthdr.st.total_bytes += n; + /* Split input buffer in chunks */ while (n > 0) { uint8_t md[MDSIZE]; @@ -417,6 +427,8 @@ dedup(int fd, char *msg) blksize = chunk_blk(inp, n); csize = comp(inp, outp, blksize, comp_size(BLKSIZE)); + enthdr.st.actual_bytes += csize; + memcpy(bdescr.md, md, sizeof(bdescr)); bdescr.offset = enthdr.store_size; bdescr.size = csize; @@ -441,8 +453,11 @@ dedup(int fd, char *msg) cent = alloc_cent(); cent->bdescr = bdescr; add_cent(cent); + + enthdr.st.cache_misses++; } else { ent->bdescr[ent->nblks++] = bdescr; + enthdr.st.cache_hits++; } inp += blksize; @@ -655,9 +670,25 @@ init(void) err(1, "fstat %s", INDEXF); if (sb.st_size != 0) xread(ifd, &enthdr, sizeof(enthdr)); - if (verbose) - fprintf(stderr, "store size: %llu bytes\n", - (unsigned long long)enthdr.store_size); + if (verbose) { + float ratio; + + fprintf(stderr, "total bytes: %llu bytes\n", + (unsigned long long)enthdr.st.total_bytes); + fprintf(stderr, "actual bytes: %llu bytes\n", + (unsigned long long)enthdr.st.actual_bytes); + + if (enthdr.st.total_bytes) { + ratio = enthdr.st.actual_bytes * 100; + ratio /= enthdr.st.total_bytes; + fprintf(stderr, "dedup ratio: %f\n", ratio); + } + + fprintf(stderr, "cache hits: %llu\n", + (unsigned long long)enthdr.st.cache_hits); + fprintf(stderr, "cache misses: %llu\n", + (unsigned long long)enthdr.st.cache_misses); + } if (cache_nents() != 0) init_cache();