dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit e2df9c742065bd99e3d5580ff68d456c4e4e1388
parent 9ffbdb14b746f421734fd66435a330b033edbad6
Author: z3bra <contactatz3bradotorg>
Date:   Mon, 18 Feb 2019 17:02:14 +0100

Optimize chunking by always refilling buffer

Diffstat:
Mdedup.c | 76++++++++++++++++++++++++++++++++++++++--------------------------------------
1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/dedup.c b/dedup.c @@ -409,64 +409,64 @@ dedup(int fd, char *msg) uint8_t *buf[2]; struct ent *ent; SHA256_CTX ctx; - ssize_t n; + ssize_t n, bufsize; buf[0] = alloc_buf(BLKSIZE); buf[1] = alloc_buf(comp_size(BLKSIZE)); ent = alloc_ent(); + bufsize = 0; SHA256_Init(&ctx); - while ((n = xread(fd, buf[0], BLKSIZE)) > 0) { + while ((n = xread(fd, buf[0] + bufsize, BLKSIZE - bufsize)) > 0 || bufsize > 0) { + uint8_t md[MDSIZE]; + struct bdescr bdescr; + size_t blksize, csize; uint8_t *inp = buf[0]; /* input buf */ uint8_t *outp = buf[1]; /* compressed buf */ - enthdr.st.total_bytes += n; - - /* Split input buffer in chunks */ - while (n > 0) { - uint8_t md[MDSIZE]; - struct bdescr bdescr; - size_t blksize, csize; - - blksize = chunk_blk(inp, n); - csize = comp(inp, outp, blksize, comp_size(BLKSIZE)); + if (n > 0) { + bufsize += n; + enthdr.st.total_bytes += n; + } - memcpy(bdescr.md, md, sizeof(bdescr)); - bdescr.offset = enthdr.store_size; - bdescr.size = csize; + blksize = chunk_blk(inp, bufsize); + csize = comp(inp, outp, blksize, comp_size(BLKSIZE)); - hash_blk(outp, bdescr.size, bdescr.md); + memcpy(bdescr.md, md, sizeof(bdescr)); + bdescr.offset = enthdr.store_size; + bdescr.size = csize; - /* Calculate file hash one block at a time */ - SHA256_Update(&ctx, outp, bdescr.size); + hash_blk(outp, bdescr.size, bdescr.md); - ent = grow_ent(ent, ent->nblks + 1); + /* Calculate file hash one block at a time */ + SHA256_Update(&ctx, outp, bdescr.size); - if (lookup_blk(bdescr.md) < 0) { - struct cent *cent; + ent = grow_ent(ent, ent->nblks + 1); - /* Update index entry */ - ent->bdescr[ent->nblks++] = bdescr; + if (lookup_blk(bdescr.md) < 0) { + struct cent *cent; - /* Store block */ - append_blk(outp, &bdescr); + /* Update index entry */ + ent->bdescr[ent->nblks++] = bdescr; - /* Create a cache entry for this block */ - cent = alloc_cent(); - cent->bdescr = bdescr; - add_cent(cent); - cache_dirty = 1; + /* Store block */ + append_blk(outp, &bdescr); - enthdr.st.actual_bytes += bdescr.size; - enthdr.st.cache_misses++; - } else { - ent->bdescr[ent->nblks++] = bdescr; - enthdr.st.cache_hits++; - } + /* Create a cache entry for this block */ + cent = alloc_cent(); + cent->bdescr = bdescr; + add_cent(cent); + cache_dirty = 1; - inp += blksize; - n -= blksize; + enthdr.st.actual_bytes += bdescr.size; + enthdr.st.cache_misses++; + } else { + ent->bdescr[ent->nblks++] = bdescr; + enthdr.st.cache_hits++; } + + memmove(inp, inp + blksize, bufsize - blksize); + bufsize -= blksize; } if (ent->nblks > 0) {