dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 8ed79b4311c4715717198c0e04dd06efb29be265
parent 499f62b1b9e3e6db6f9f77defdb0c2bb15c25778
Author: sin <sin@2f30.org>
Date:   Thu, 21 Feb 2019 13:48:22 +0000

Rework chunker interface

Eventually, this will be implemented using a ring buffer.

Diffstat:
Mchunker.c | 56+++++++++++++++++++++++++++++++-------------------------
Mdedup.c | 2+-
Mdedup.h | 6+++---
3 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/chunker.c b/chunker.c @@ -1,3 +1,4 @@ +#include <assert.h> #include <err.h> #include <stdint.h> #include <stdio.h> @@ -9,8 +10,9 @@ struct chunker { uint8_t *buf; - size_t size; - size_t pos; + size_t cap; + size_t rpos; + size_t wpos; int fd; }; @@ -22,8 +24,8 @@ get_chunk_size(struct chunker *chunker) uint32_t fp; /* buzhash should be at least WINSIZE */ - if (chunker->pos < WINSIZE) - return chunker->pos; + if (chunker->wpos - chunker->rpos < WINSIZE) + return chunker->wpos - chunker->rpos; bp = chunker->buf; @@ -37,18 +39,18 @@ get_chunk_size(struct chunker *chunker) * WINSIZE the smallest possible block size. */ fp = buzh_init(bp, WINSIZE); - for (i = 0; i < chunker->pos - WINSIZE; i++) { + for (i = chunker->rpos; i < chunker->wpos - WINSIZE; i++) { if (i > 0) fp = buzh_update(fp, bp[i - 1], bp[WINSIZE + i - 1], WINSIZE); if ((fp & HASHMSK) == 0) return i + WINSIZE; } - return chunker->pos; + return chunker->wpos - chunker->rpos; } struct chunker * -alloc_chunker(size_t size, int fd) +alloc_chunker(size_t cap, int fd) { struct chunker *chunker; @@ -56,11 +58,12 @@ alloc_chunker(size_t size, int fd) if (chunker == NULL) err(1, "malloc"); - chunker->buf = malloc(size); + chunker->buf = malloc(cap); if (chunker->buf == NULL) err(1, "malloc"); - chunker->size = size; - chunker->pos = 0; + chunker->cap = cap; + chunker->rpos = 0; + chunker->wpos = 0; chunker->fd = fd; return chunker; @@ -79,28 +82,31 @@ fill_chunker(struct chunker *chunker) uint8_t *bp; ssize_t n; - bp = &chunker->buf[chunker->pos]; - n = read(chunker->fd, bp, chunker->size - chunker->pos); + bp = &chunker->buf[chunker->wpos]; + n = read(chunker->fd, bp, chunker->cap - chunker->wpos); if (n < 0) err(1, "read"); - chunker->pos += n; - return chunker->pos; + chunker->wpos += n; + return chunker->wpos; +} + +uint8_t * +get_chunk(struct chunker *chunker, size_t *chunk_size) +{ + assert(chunker->rpos <= chunker->wpos); + *chunk_size = get_chunk_size(chunker); + chunker->rpos += *chunk_size; + return chunker->buf; } void -drain_chunker(struct chunker *chunker, size_t chunk_size) +drain_chunker(struct chunker *chunker) { uint8_t *src, *dst; - src = &chunker->buf[chunk_size]; + src = &chunker->buf[chunker->rpos]; dst = chunker->buf; - memmove(dst, src, chunker->pos - chunk_size); - chunker->pos -= chunk_size; -} - -uint8_t * -get_chunk(struct chunker *chunker, size_t *size) -{ - *size = get_chunk_size(chunker); - return chunker->buf; + memmove(dst, src, chunker->wpos - chunker->rpos); + chunker->wpos -= chunker->rpos; + chunker->rpos = 0; } diff --git a/dedup.c b/dedup.c @@ -445,7 +445,7 @@ dedup(int fd, char *msg) SHA256_Update(&ctx, chunkp, chunk_size); snap = grow_snap(snap, snap->nr_blk_descs + 1); dedup_chunk(snap, chunkp, chunk_size); - drain_chunker(chunker, chunk_size); + drain_chunker(chunker); } SHA256_Final(snap->md, &ctx); diff --git a/dedup.h b/dedup.h @@ -3,11 +3,11 @@ struct chunker; /* chunker.c */ -struct chunker *alloc_chunker(size_t size, int fd); +struct chunker *alloc_chunker(size_t cap, int fd); void free_chunker(struct chunker *chunker); ssize_t fill_chunker(struct chunker *chunker); -void drain_chunker(struct chunker *chunker, size_t n); -uint8_t *get_chunk(struct chunker *chunker, size_t *size); +uint8_t *get_chunk(struct chunker *chunker, size_t *chunk_size); +void drain_chunker(struct chunker *chunker); /* hash.c */ uint32_t buzh_init(uint8_t *buf, size_t size);