dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 893ee1895fb91a98344ba68192d2e9ad678bc203
parent e8056ddcaccd7972a5008983d9bc60a3a36498c5
Author: sin <sin@2f30.org>
Date:   Thu, 21 Feb 2019 10:54:35 +0000

Simplify dedup() function

Diffstat:
Mdedup.c | 122++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
1 file changed, 74 insertions(+), 48 deletions(-)

diff --git a/dedup.c b/dedup.c @@ -230,6 +230,12 @@ alloc_cache_entry(void) } void +free_cache_entry(struct cache_entry *ent) +{ + free(ent); +} + +void add_cache_entry(struct cache_entry *ent) { RB_INSERT(cache, &cache_head, ent); @@ -255,7 +261,7 @@ free_cache(void) RB_FOREACH_SAFE(ent, cache, &cache_head, tmp) { RB_REMOVE(cache, &cache_head, ent); - free(ent); + free_cache_entry(ent); } } @@ -295,6 +301,12 @@ alloc_snap(void) return snap; } +void +free_snap(struct snapshot *snap) +{ + free(snap); +} + struct snapshot * grow_snap(struct snapshot *snap, uint64_t nr_blk_descs) { @@ -320,6 +332,12 @@ alloc_buf(size_t size) } void +free_buf(uint8_t *buf) +{ + free(buf); +} + +void hash_blk(uint8_t *buf, size_t size, uint8_t *md) { SHA256_CTX ctx; @@ -360,64 +378,73 @@ lookup_blk_desc(uint8_t *md, struct blk_desc *blk_desc) } void +dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size) +{ + uint8_t md[MDSIZE]; + uint8_t *comp_buf; + struct blk_desc blk_desc; + size_t n; + + comp_buf = alloc_buf(comp_size(BLKSIZE)); + + n = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE)); + hash_blk(comp_buf, n, md); + + snaphdr.st.orig_size += chunk_size; + snaphdr.st.comp_size += n; + + if (lookup_blk_desc(md, &blk_desc) < 0) { + struct cache_entry *ent; + + memcpy(blk_desc.md, md, sizeof(blk_desc.md)); + blk_desc.offset = snaphdr.store_size; + blk_desc.size = n; + + snap->blk_desc[snap->nr_blk_descs++] = blk_desc; + + append_blk(comp_buf, &blk_desc); + + ent = alloc_cache_entry(); + ent->blk_desc = blk_desc; + add_cache_entry(ent); + cache_dirty = 1; + cache_misses++; + + snaphdr.st.dedup_size += blk_desc.size; + snaphdr.st.nr_blks++; + + if (blk_desc.size > snaphdr.st.max_blk_size) + snaphdr.st.max_blk_size = blk_desc.size; + if (blk_desc.size < snaphdr.st.min_blk_size) + snaphdr.st.min_blk_size = blk_desc.size; + } else { + snap->blk_desc[snap->nr_blk_descs++] = blk_desc; + cache_hits++; + } + + free(comp_buf); +} + +void dedup(int fd, char *msg) { struct snapshot *snap; struct chunker *chunker; - uint8_t *comp_buf; SHA256_CTX ctx; ssize_t n; snap = alloc_snap(); chunker = alloc_chunker(BLKSIZE, fd); - comp_buf = alloc_buf(comp_size(BLKSIZE)); SHA256_Init(&ctx); while ((n = fill_chunker(chunker)) > 0) { - uint8_t md[MDSIZE]; - struct blk_desc blk_desc; - size_t chunk_size, csize; uint8_t *chunkp; + size_t chunk_size; chunkp = get_chunk(chunker, &chunk_size); SHA256_Update(&ctx, chunkp, chunk_size); - - csize = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE)); - hash_blk(comp_buf, csize, md); - - snaphdr.st.orig_size += chunk_size; - snaphdr.st.comp_size += csize; - snap = grow_snap(snap, snap->nr_blk_descs + 1); - if (lookup_blk_desc(md, &blk_desc) < 0) { - struct cache_entry *ent; - - memcpy(blk_desc.md, md, sizeof(blk_desc.md)); - blk_desc.offset = snaphdr.store_size; - blk_desc.size = csize; - - snap->blk_desc[snap->nr_blk_descs++] = blk_desc; - - append_blk(comp_buf, &blk_desc); - - ent = alloc_cache_entry(); - ent->blk_desc = blk_desc; - add_cache_entry(ent); - cache_dirty = 1; - cache_misses++; - - snaphdr.st.dedup_size += blk_desc.size; - snaphdr.st.nr_blks++; - - if (blk_desc.size > snaphdr.st.max_blk_size) - snaphdr.st.max_blk_size = blk_desc.size; - if (blk_desc.size < snaphdr.st.min_blk_size) - snaphdr.st.min_blk_size = blk_desc.size; - } else { - snap->blk_desc[snap->nr_blk_descs++] = blk_desc; - cache_hits++; - } - + dedup_chunk(snap, chunkp, chunk_size); drain_chunker(chunker, chunk_size); } @@ -437,9 +464,8 @@ dedup(int fd, char *msg) append_snap(snap); } - free(comp_buf); free_chunker(chunker); - free(snap); + free_snap(snap); } int @@ -461,8 +487,8 @@ extract(struct snapshot *snap, void *arg) blksize = decomp(buf[1], buf[0], snap->blk_desc[i].size, BLKSIZE); xwrite(args->fd, buf[0], blksize); } - free(buf[1]); - free(buf[0]); + free_buf(buf[1]); + free_buf(buf[0]); return WALK_STOP; } @@ -502,7 +528,7 @@ check(struct snapshot *snap, void *arg) fprintf(stderr, " Size: %llu\n", (unsigned long long)snap->blk_desc[i].size); } - free(buf); + free_buf(buf); return WALK_CONTINUE; } @@ -566,7 +592,7 @@ walk(int (*fn)(struct snapshot *, void *), void *arg) if ((*fn)(snap, arg) == WALK_STOP) break; } - free(snap); + free_snap(snap); } void