dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit ec64133a5a57e21b3565368df37eae469c260755
parent 4029c846bc6e9b84ba02740fecae68cd8232c765
Author: sin <sin@2f30.org>
Date:   Thu, 28 Feb 2019 13:01:18 +0000

Rework types

Diffstat:
Mdedup.c | 134+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mdedup.h | 24++++++++++++++++--------
Mtypes.c | 99++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
3 files changed, 178 insertions(+), 79 deletions(-)

diff --git a/dedup.c b/dedup.c @@ -31,7 +31,8 @@ struct extract_args { int ret; }; -static struct snapshot_hdr snaphdr; +static struct snapshot_hdr snap_hdr; +static struct blk_hdr blk_hdr; static struct cache *cache; static int ifd; static int sfd; @@ -105,15 +106,11 @@ print_stats(struct stats *st) static void append_snap(struct snapshot *snap) { - /* Update snapshot header */ - snaphdr.nr_snapshots++; - xlseek(ifd, 0, SEEK_SET); - write_snaphdr(ifd, &snaphdr); - - /* Append snapshot */ - xlseek(ifd, 0, SEEK_END); + xlseek(ifd, snap_hdr.size, SEEK_SET); write_snapshot(ifd, snap); write_snapshot_blk_descs(ifd, snap); + snap_hdr.size += snap->size; + snap_hdr.nr_snapshots++; } static struct snapshot * @@ -190,9 +187,9 @@ read_blk(uint8_t *buf, struct blk_desc *blk_desc) static void append_blk(uint8_t *buf, struct blk_desc *blk_desc) { - xlseek(sfd, snaphdr.store_size, SEEK_SET); + xlseek(sfd, blk_hdr.size, SEEK_SET); xwrite(sfd, buf, blk_desc->size); - snaphdr.store_size += blk_desc->size; + blk_hdr.size += blk_desc->size; } static void @@ -208,15 +205,15 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size) n = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE_MAX)); hash_blk(comp_buf, n, md); - snaphdr.st.orig_size += chunk_size; - snaphdr.st.comp_size += n; + blk_hdr.st.orig_size += chunk_size; + blk_hdr.st.comp_size += n; memcpy(cache_entry.md, md, sizeof(cache_entry.md)); if (lookup_cache_entry(cache, &cache_entry) < 0) { struct blk_desc blk_desc; memcpy(&blk_desc.md, md, sizeof(blk_desc.md)); - blk_desc.offset = snaphdr.store_size; + blk_desc.offset = blk_hdr.size; blk_desc.size = n; snap->blk_desc[snap->nr_blk_descs++] = blk_desc; @@ -228,13 +225,13 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size) add_cache_entry(cache, &cache_entry); cache_misses++; - snaphdr.st.dedup_size += blk_desc.size; - snaphdr.st.nr_blks++; + blk_hdr.st.dedup_size += blk_desc.size; + blk_hdr.st.nr_blks++; - if (blk_desc.size > snaphdr.st.max_blk_size) - snaphdr.st.max_blk_size = blk_desc.size; - if (blk_desc.size < snaphdr.st.min_blk_size) - snaphdr.st.min_blk_size = blk_desc.size; + if (blk_desc.size > blk_hdr.st.max_blk_size) + blk_hdr.st.max_blk_size = blk_desc.size; + if (blk_desc.size < blk_hdr.st.min_blk_size) + blk_hdr.st.min_blk_size = blk_desc.size; } else { struct blk_desc blk_desc; @@ -406,7 +403,7 @@ walk_snap(int (*fn)(struct snapshot *, void *), void *arg) { uint64_t i; - for (i = 0; i < snaphdr.nr_snapshots; i++) { + for (i = 0; i < snap_hdr.nr_snapshots; i++) { struct snapshot *snap; int ret; @@ -422,6 +419,19 @@ walk_snap(int (*fn)(struct snapshot *, void *), void *arg) } } +static void +match_ver(uint64_t v) +{ + uint8_t maj, min; + + min = v & 0xff; + maj = (v >> 8) & 0xff; + if (maj == VER_MAJ && min == VER_MIN) + return; + errx(1, "format version mismatch: expected %u.%u but got %u.%u", + VER_MAJ, VER_MIN, maj, min); +} + static int flush_cache(struct cache_entry *cache_entry) { @@ -441,7 +451,7 @@ load_cache(void) nr_entries = sb.st_size / CACHE_ENTRY_LEN; if (nr_entries == 0) { - xlseek(ifd, SNAPHDR_LEN, SEEK_SET); + xlseek(ifd, SNAP_HDR_LEN, SEEK_SET); walk_snap(rebuild_cache, NULL); return; } @@ -455,26 +465,63 @@ load_cache(void) } static void -load_snaphdr(void) +save_cache(void) +{ + if (cache_dirty) { + xlseek(cfd, 0, SEEK_SET); + walk_cache(cache, flush_cache); + } +} + +static void +load_blk_hdr(void) +{ + struct stat sb; + + if (fstat(sfd, &sb) < 0) + err(1, "fstat %s", STOREF); + if (sb.st_size == 0) { + blk_hdr.flags = (VER_MAJ << 8) | VER_MIN; + blk_hdr.size = BLK_HDR_LEN; + blk_hdr.st.min_blk_size = comp_size(BLKSIZE_MAX); + write_blk_hdr(sfd, &blk_hdr); + return; + } + + read_blk_hdr(sfd, &blk_hdr); + match_ver(blk_hdr.flags); +} + +static void +save_blk_hdr(void) +{ + xlseek(sfd, 0, SEEK_SET); + write_blk_hdr(sfd, &blk_hdr); +} + +static void +load_snap_hdr(void) { - uint8_t maj, min; struct stat sb; if (fstat(ifd, &sb) < 0) err(1, "fstat %s", SNAPSF); if (sb.st_size == 0) { - snaphdr.flags = (VER_MAJ << 8) | VER_MIN; - snaphdr.st.min_blk_size = comp_size(BLKSIZE_MAX); - write_snaphdr(ifd, &snaphdr); + snap_hdr.flags = (VER_MAJ << 8) | VER_MIN; + snap_hdr.size = SNAP_HDR_LEN; + write_snap_hdr(ifd, &snap_hdr); return; } - read_snaphdr(ifd, &snaphdr); - min = snaphdr.flags & 0xff; - maj = (snaphdr.flags >> 8) & 0xff; - if (maj != VER_MAJ || min != VER_MIN) - errx(1, "format version mismatch: expected %u.%u but got %u.%u", - VER_MAJ, VER_MIN, maj, min); + read_snap_hdr(ifd, &snap_hdr); + match_ver(snap_hdr.flags); +} + +static void +save_snap_hdr(void) +{ + xlseek(ifd, 0, SEEK_SET); + write_snap_hdr(ifd, &snap_hdr); } static void @@ -499,21 +546,20 @@ init(void) flock(cfd, LOCK_NB | LOCK_EX) < 0) errx(1, "busy lock"); - load_snaphdr(); + load_snap_hdr(); + load_blk_hdr(); load_cache(); } static void term(void) { - if (verbose) - print_stats(&snaphdr.st); + if (verbose > 0) + print_stats(&blk_hdr.st); - if (cache_dirty) { - xlseek(cfd, 0, SEEK_SET); - walk_cache(cache, flush_cache); - } - free_cache(cache); + save_snap_hdr(); + save_blk_hdr(); + save_cache(); fsync(ifd); fsync(sfd); @@ -522,6 +568,8 @@ term(void) close(ifd); close(sfd); close(cfd); + + free_cache(cache); } static void @@ -589,14 +637,14 @@ main(int argc, char *argv[]) init(); if (cflag) { - xlseek(ifd, SNAPHDR_LEN, SEEK_SET); + xlseek(ifd, SNAP_HDR_LEN, SEEK_SET); walk_snap(check, NULL); term(); return 0; } if (lflag) { - xlseek(ifd, SNAPHDR_LEN, SEEK_SET); + xlseek(ifd, SNAP_HDR_LEN, SEEK_SET); walk_snap(list, NULL); term(); return 0; @@ -605,7 +653,7 @@ main(int argc, char *argv[]) if (id) { struct extract_args args; - xlseek(ifd, SNAPHDR_LEN, SEEK_SET); + xlseek(ifd, SNAP_HDR_LEN, SEEK_SET); str2bin(id, md); args.md = md; args.fd = fd; diff --git a/dedup.h b/dedup.h @@ -6,16 +6,17 @@ * using the helpers from types.c. Any modification made to * the structs below will need to be reflected here and in types.c. */ -#define SNAPHDR_LEN 152 -#define BLKDESC_LEN 48 +#define SNAP_HDR_LEN 56 +#define BLK_HDR_LEN 112 +#define BLK_DESC_LEN 48 #define SNAPSHOT_LEN 304 #define CACHE_ENTRY_LEN 48 #define MSGSIZE 256 #define MDSIZE 32 -/* snashot file format version */ -#define VER_MIN 1 +/* file format version */ +#define VER_MIN 2 #define VER_MAJ 0 struct cache; @@ -33,9 +34,14 @@ struct stats { struct snapshot_hdr { uint64_t flags; + uint64_t size; uint64_t nr_snapshots; - uint64_t store_size; uint64_t reserved[4]; +}; + +struct blk_hdr { + uint64_t flags; + uint64_t size; struct stats st; }; @@ -83,9 +89,11 @@ int pack(unsigned char *dst, char *fmt, ...); int unpack(unsigned char *src, char *fmt, ...); /* types.c */ -void read_snaphdr(int fd, struct snapshot_hdr *hdr); -void write_snaphdr(int fd, struct snapshot_hdr *hdr); -void write_snaphdr(int fd, struct snapshot_hdr *hdr); +void read_snap_hdr(int fd, struct snapshot_hdr *hdr); +void write_snap_hdr(int fd, struct snapshot_hdr *hdr); +void read_blk_hdr(int fd, struct blk_hdr *hdr); +void write_blk_hdr(int fd, struct blk_hdr *hdr); +void read_blk_desc(int fd, struct blk_desc *desc); void write_blk_desc(int fd, struct blk_desc *desc); void read_snapshot(int fd, struct snapshot *snap); void read_snapshot_descs(int fd, struct snapshot *snap); diff --git a/types.c b/types.c @@ -7,18 +7,18 @@ #include "dedup.h" void -read_snaphdr(int fd, struct snapshot_hdr *hdr) +read_snap_hdr(int fd, struct snapshot_hdr *hdr) { - uint8_t buf[SNAPHDR_LEN]; + uint8_t buf[SNAP_HDR_LEN]; int n; if (xread(fd, buf, sizeof(buf)) == 0) - errx(1, "read_snaphdr: unexpected EOF"); + errx(1, "read_snap_hdr: unexpected EOF"); n = unpack(buf, "qqq", &hdr->flags, - &hdr->nr_snapshots, - &hdr->store_size); + &hdr->size, + &hdr->nr_snapshots); n += unpack(&buf[n], "qqqq", &hdr->reserved[0], @@ -26,6 +26,57 @@ read_snaphdr(int fd, struct snapshot_hdr *hdr) &hdr->reserved[2], &hdr->reserved[3]); + if (verbose > 1) + printf("%s: flags = %llx, size = %llx, nr_snapshots = %llx\n", + __func__, + (unsigned long long)hdr->flags, + (unsigned long long)hdr->size, + (unsigned long long)hdr->nr_snapshots); + + assert(n == SNAP_HDR_LEN); +} + +void +write_snap_hdr(int fd, struct snapshot_hdr *hdr) +{ + uint8_t buf[SNAP_HDR_LEN]; + int n; + + n = pack(buf, "qqq", + hdr->flags, + hdr->size, + hdr->nr_snapshots); + + n += pack(&buf[n], "qqqq", + hdr->reserved[0], + hdr->reserved[1], + hdr->reserved[2], + hdr->reserved[3]); + + if (verbose > 1) + printf("%s: flags = %llx, size = %llx, nr_snapshots = %llx\n", + __func__, + (unsigned long long)hdr->flags, + (unsigned long long)hdr->size, + (unsigned long long)hdr->nr_snapshots); + + assert(n == SNAP_HDR_LEN); + xwrite(fd, buf, n); +} + +void +read_blk_hdr(int fd, struct blk_hdr *hdr) +{ + uint8_t buf[BLK_HDR_LEN]; + int n; + + if (xread(fd, buf, sizeof(buf)) == 0) + errx(1, "read_blk_desc: unexpected EOF"); + + n = unpack(buf, "qq", + &hdr->flags, + &hdr->size); + n += unpack(&buf[n], "qqqqqq", &hdr->st.orig_size, &hdr->st.comp_size, @@ -42,25 +93,18 @@ read_snaphdr(int fd, struct snapshot_hdr *hdr) &hdr->st.reserved[4], &hdr->st.reserved[5]); - assert(n == SNAPHDR_LEN); + assert(n == BLK_HDR_LEN); } void -write_snaphdr(int fd, struct snapshot_hdr *hdr) +write_blk_hdr(int fd, struct blk_hdr *hdr) { - uint8_t buf[SNAPHDR_LEN]; + uint8_t buf[BLK_HDR_LEN]; int n; - n = pack(buf, "qqq", + n = pack(buf, "qq", hdr->flags, - hdr->nr_snapshots, - hdr->store_size); - - n += pack(&buf[n], "qqqq", - hdr->reserved[0], - hdr->reserved[1], - hdr->reserved[2], - hdr->reserved[3]); + hdr->size); n += pack(&buf[n], "qqqqqq", hdr->st.orig_size, @@ -78,14 +122,14 @@ write_snaphdr(int fd, struct snapshot_hdr *hdr) hdr->st.reserved[4], hdr->st.reserved[5]); - assert(n == SNAPHDR_LEN); + assert(n == BLK_HDR_LEN); xwrite(fd, buf, n); } void read_blk_desc(int fd, struct blk_desc *desc) { - uint8_t buf[BLKDESC_LEN]; + uint8_t buf[BLK_DESC_LEN]; char fmt[BUFSIZ]; int n; @@ -98,13 +142,13 @@ read_blk_desc(int fd, struct blk_desc *desc) &desc->offset, &desc->size); - assert(n == BLKDESC_LEN); + assert(n == BLK_DESC_LEN); } void write_blk_desc(int fd, struct blk_desc *desc) { - uint8_t buf[BLKDESC_LEN]; + uint8_t buf[BLK_DESC_LEN]; char fmt[BUFSIZ]; int n; @@ -114,7 +158,7 @@ write_blk_desc(int fd, struct blk_desc *desc) desc->offset, desc->size); - assert(n == BLKDESC_LEN); + assert(n == BLK_DESC_LEN); xwrite(fd, buf, n); } @@ -154,16 +198,15 @@ write_snapshot(int fd, struct snapshot *snap) char fmt[BUFSIZ]; int n; - snprintf(fmt, sizeof(fmt), "q'%d'%dq", MSGSIZE, MDSIZE); - - if (mul_overflow(snap->nr_blk_descs, BLKDESC_LEN)) + if (mul_overflow(snap->nr_blk_descs, BLK_DESC_LEN)) errx(1, "write_snapshot: overflow"); - snap->size = snap->nr_blk_descs * BLKDESC_LEN; + snap->size = snap->nr_blk_descs * BLK_DESC_LEN; - if (add_overflow(SNAPHDR_LEN, snap->size)) + if (add_overflow(SNAPSHOT_LEN, snap->size)) errx(1, "write_snapshot: overflow"); - snap->size += SNAPHDR_LEN; + snap->size += SNAPSHOT_LEN; + snprintf(fmt, sizeof(fmt), "q'%d'%dq", MSGSIZE, MDSIZE); n = pack(buf, fmt, snap->size, snap->msg,