dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 78273e89af8024ff892a83cdb04b8b2374aa9521
parent 94596df085ef611772b52d9d7e3ffe0e26db3fb8
Author: sin <sin@2f30.org>
Date:   Thu, 18 Apr 2019 10:35:07 +0100

Put snapshot helpers to utils.c

Diffstat:
Mdcheck.c | 66+-----------------------------------------------------------------
Mdedup.h | 12++++++++++++
Mdlist.c | 66+-----------------------------------------------------------------
Mdpack.c | 115+++----------------------------------------------------------------------------
Mdunpack.c | 66+-----------------------------------------------------------------
Mutils.c | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 124 insertions(+), 307 deletions(-)

diff --git a/dcheck.c b/dcheck.c @@ -17,11 +17,6 @@ #define SNAPSF ".snapshots" #define STOREF ".store" -enum { - WALK_CONTINUE, - WALK_STOP -}; - static struct snap_hdr snap_hdr; static struct blk_hdr blk_hdr; static int ifd; @@ -41,42 +36,6 @@ print_md(FILE *fp, uint8_t *md, size_t size) fprintf(fp, "%02x", md[i]); } -static struct snap * -alloc_snap(void) -{ - struct snap *snap; - - snap = calloc(1, sizeof(*snap)); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - -static void -free_snap(struct snap *snap) -{ - free(snap); -} - -static struct snap * -grow_snap(struct snap *snap, uint64_t nr_blk_descs) -{ - size_t size; - - if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0])) - errx(1, "%s: overflow", __func__); - size = nr_blk_descs * sizeof(snap->blk_desc[0]); - - if (size > SIZE_MAX - sizeof(*snap)) - errx(1, "%s: overflow", __func__); - size += sizeof(*snap); - - snap = realloc(snap, size); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - static uint8_t * alloc_buf(size_t size) { @@ -169,29 +128,6 @@ check_snap(struct snap *snap, void *arg) return WALK_CONTINUE; } -/* Walk through all snapshots and call fn() on each one */ -static void -walk_snap(int (*fn)(struct snap *, void *), void *arg) -{ - uint64_t i; - - xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET); - for (i = 0; i < snap_hdr.nr_snaps; i++) { - struct snap *snap; - int ret; - - snap = alloc_snap(); - read_snap(ifd, snap); - snap = grow_snap(snap, snap->nr_blk_descs); - read_snap_descs(ifd, snap); - - ret = (*fn)(snap, arg); - free_snap(snap); - if (ret == WALK_STOP) - break; - } -} - static void init(void) { @@ -257,7 +193,7 @@ main(int argc, char *argv[]) init(); ret = 0; - walk_snap(check_snap, &ret); + walk_snap(ifd, &snap_hdr, check_snap, &ret); if (ret != 0) errx(1, "%s or %s is corrupted", SNAPSF, STOREF); term(); diff --git a/dedup.h b/dedup.h @@ -29,6 +29,11 @@ #define COMPR_ALGO_SHIFT 16 #define COMPR_ALGO_MASK 0x7 /* max 8 compression algos */ +enum { + WALK_CONTINUE, + WALK_STOP +}; + enum compr_algo { COMPR_NONE, COMPR_LZ4, @@ -210,3 +215,10 @@ void init_blk_hdr(struct blk_hdr *hdr, int compr_algo, int hash_algo); void init_snap_hdr(struct snap_hdr *hdr); void load_blk_hdr(int fd, struct blk_hdr *hdr, int *compr_algo, int *hash_algo); void load_snap_hdr(int fd, struct snap_hdr *hdr); +struct snap *alloc_snap(void); +void free_snap(struct snap *snap); +struct snap *grow_snap(struct snap *snap, uint64_t nr_blk_descs); +void append_snap(int fd, struct snap_hdr *hdr, struct snap *snap); +void hash_snap(struct snap *snap, uint8_t *md, int hash_algo); +void walk_snap(int fd, struct snap_hdr *hdr, + int (*fn)(struct snap *, void *), void *arg); diff --git a/dlist.c b/dlist.c @@ -17,11 +17,6 @@ #define SNAPSF ".snapshots" #define STOREF ".store" -enum { - WALK_CONTINUE, - WALK_STOP -}; - static struct snap_hdr snap_hdr; static struct blk_hdr blk_hdr; static int ifd; @@ -41,42 +36,6 @@ print_md(FILE *fp, uint8_t *md, size_t size) fprintf(fp, "%02x", md[i]); } -static struct snap * -alloc_snap(void) -{ - struct snap *snap; - - snap = calloc(1, sizeof(*snap)); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - -static void -free_snap(struct snap *snap) -{ - free(snap); -} - -static struct snap * -grow_snap(struct snap *snap, uint64_t nr_blk_descs) -{ - size_t size; - - if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0])) - errx(1, "%s: overflow", __func__); - size = nr_blk_descs * sizeof(snap->blk_desc[0]); - - if (size > SIZE_MAX - sizeof(*snap)) - errx(1, "%s: overflow", __func__); - size += sizeof(*snap); - - snap = realloc(snap, size); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - static int list(struct snap *snap, void *arg) { @@ -88,29 +47,6 @@ list(struct snap *snap, void *arg) return WALK_CONTINUE; } -/* Walk through all snapshots and call fn() on each one */ -static void -walk_snap(int (*fn)(struct snap *, void *), void *arg) -{ - uint64_t i; - - xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET); - for (i = 0; i < snap_hdr.nr_snaps; i++) { - struct snap *snap; - int ret; - - snap = alloc_snap(); - read_snap(ifd, snap); - snap = grow_snap(snap, snap->nr_blk_descs); - read_snap_descs(ifd, snap); - - ret = (*fn)(snap, arg); - free_snap(snap); - if (ret == WALK_STOP) - break; - } -} - static void init(void) { @@ -175,7 +111,7 @@ main(int argc, char *argv[]) err(1, "chdir: %s", repo); init(); - walk_snap(list, NULL); + walk_snap(ifd, &snap_hdr, list, NULL); term(); return 0; } diff --git a/dpack.c b/dpack.c @@ -17,11 +17,6 @@ #define SNAPSF ".snapshots" #define STOREF ".store" -enum { - WALK_CONTINUE, - WALK_STOP -}; - static struct snap_hdr snap_hdr; static struct blk_hdr blk_hdr; static struct icache *icache; @@ -33,87 +28,6 @@ static int compr_algo = COMPR_LZ4; int verbose; char *argv0; -static struct snap * -alloc_snap(void) -{ - struct snap *snap; - - snap = calloc(1, sizeof(*snap)); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - -static void -free_snap(struct snap *snap) -{ - free(snap); -} - -/* - * The snapshot hash is calculated over the - * hash of its block descriptors. - */ -static void -hash_snap(struct snap *snap, uint8_t *md) -{ - struct hash_ctx ctx; - uint64_t i; - - if (hash_init(&ctx, hash_algo, MD_SIZE) < 0) - errx(1, "hash_init failed"); - for (i = 0; i < snap->nr_blk_descs; i++) { - struct blk_desc *blk_desc; - - blk_desc = &snap->blk_desc[i]; - hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md)); - } - hash_final(&ctx, md, MD_SIZE); -} - -static struct snap * -grow_snap(struct snap *snap, uint64_t nr_blk_descs) -{ - size_t size; - - if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0])) - errx(1, "%s: overflow", __func__); - size = nr_blk_descs * sizeof(snap->blk_desc[0]); - - if (size > SIZE_MAX - sizeof(*snap)) - errx(1, "%s: overflow", __func__); - size += sizeof(*snap); - - snap = realloc(snap, size); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - -static void -append_snap(struct snap *snap) -{ - if (snap->nr_blk_descs > UINT64_MAX / BLK_DESC_SIZE) - errx(1, "%s: overflow", __func__); - snap->size = snap->nr_blk_descs * BLK_DESC_SIZE; - - if (snap->size > UINT64_MAX - SNAPSHOT_SIZE) - errx(1, "%s: overflow", __func__); - snap->size += SNAPSHOT_SIZE; - - xlseek(ifd, snap_hdr.size, SEEK_SET); - write_snap(ifd, snap); - write_snap_blk_descs(ifd, snap); - - if (snap_hdr.size > UINT64_MAX - snap->size) - errx(1, "%s: overflow", __func__); - snap_hdr.size += snap->size; - - if (snap_hdr.nr_snaps > UINT64_MAX - 1) - errx(1, "%s: overflow", __func__); - snap_hdr.nr_snaps++; -} - static uint8_t * alloc_buf(size_t size) { @@ -222,8 +136,8 @@ dedup(int fd, char *msg) memcpy(snap->msg, msg, size); snap->msg[size - 1] = '\0'; } - hash_snap(snap, snap->md); - append_snap(snap); + hash_snap(snap, snap->md, hash_algo); + append_snap(ifd, &snap_hdr, snap); } free_chunker(chunker); @@ -251,29 +165,6 @@ build_icache(struct snap *snap, void *arg) return WALK_CONTINUE; } -/* Walk through all snapshots and call fn() on each one */ -static void -walk_snap(int (*fn)(struct snap *, void *), void *arg) -{ - uint64_t i; - - xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET); - for (i = 0; i < snap_hdr.nr_snaps; i++) { - struct snap *snap; - int ret; - - snap = alloc_snap(); - read_snap(ifd, snap); - snap = grow_snap(snap, snap->nr_blk_descs); - read_snap_descs(ifd, snap); - - ret = (*fn)(snap, arg); - free_snap(snap); - if (ret == WALK_STOP) - break; - } -} - static void init(void) { @@ -296,7 +187,7 @@ init(void) load_blk_hdr(sfd, &blk_hdr, &compr_algo, &hash_algo); icache = alloc_icache(); - walk_snap(build_icache, NULL); + walk_snap(ifd, &snap_hdr, build_icache, NULL); } static void diff --git a/dunpack.c b/dunpack.c @@ -17,11 +17,6 @@ #define SNAPSF ".snapshots" #define STOREF ".store" -enum { - WALK_CONTINUE, - WALK_STOP -}; - struct extract_args { uint8_t *md; int fd; @@ -38,42 +33,6 @@ static int compr_algo = COMPR_LZ4; int verbose; char *argv0; -static struct snap * -alloc_snap(void) -{ - struct snap *snap; - - snap = calloc(1, sizeof(*snap)); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - -static void -free_snap(struct snap *snap) -{ - free(snap); -} - -static struct snap * -grow_snap(struct snap *snap, uint64_t nr_blk_descs) -{ - size_t size; - - if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0])) - errx(1, "%s: overflow", __func__); - size = nr_blk_descs * sizeof(snap->blk_desc[0]); - - if (size > SIZE_MAX - sizeof(*snap)) - errx(1, "%s: overflow", __func__); - size += sizeof(*snap); - - snap = realloc(snap, size); - if (snap == NULL) - err(1, "%s", __func__); - return snap; -} - static uint8_t * alloc_buf(size_t size) { @@ -135,29 +94,6 @@ extract(struct snap *snap, void *arg) return WALK_STOP; } -/* Walk through all snapshots and call fn() on each one */ -static void -walk_snap(int (*fn)(struct snap *, void *), void *arg) -{ - uint64_t i; - - xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET); - for (i = 0; i < snap_hdr.nr_snaps; i++) { - struct snap *snap; - int ret; - - snap = alloc_snap(); - read_snap(ifd, snap); - snap = grow_snap(snap, snap->nr_blk_descs); - read_snap_descs(ifd, snap); - - ret = (*fn)(snap, arg); - free_snap(snap); - if (ret == WALK_STOP) - break; - } -} - static void init(void) { @@ -229,7 +165,7 @@ main(int argc, char *argv[]) args.md = md; args.fd = STDOUT_FILENO; args.ret = -1; - walk_snap(extract, &args); + walk_snap(ifd, &snap_hdr, extract, &args); if (args.ret != 0) errx(1, "unknown snapshot: %s", id); term(); diff --git a/utils.c b/utils.c @@ -3,6 +3,7 @@ #include <err.h> #include <stdint.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <unistd.h> @@ -128,3 +129,108 @@ load_snap_hdr(int fd, struct snap_hdr *hdr) read_snap_hdr(fd, hdr); match_ver(hdr->flags); } + +struct snap * +alloc_snap(void) +{ + struct snap *snap; + + snap = calloc(1, sizeof(*snap)); + if (snap == NULL) + err(1, "%s", __func__); + return snap; +} + +void +free_snap(struct snap *snap) +{ + free(snap); +} + +struct snap * +grow_snap(struct snap *snap, uint64_t nr_blk_descs) +{ + size_t size; + + if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0])) + errx(1, "%s: overflow", __func__); + size = nr_blk_descs * sizeof(snap->blk_desc[0]); + + if (size > SIZE_MAX - sizeof(*snap)) + errx(1, "%s: overflow", __func__); + size += sizeof(*snap); + + snap = realloc(snap, size); + if (snap == NULL) + err(1, "%s", __func__); + return snap; +} + +void +append_snap(int fd, struct snap_hdr *hdr, struct snap *snap) +{ + if (snap->nr_blk_descs > UINT64_MAX / BLK_DESC_SIZE) + errx(1, "%s: overflow", __func__); + snap->size = snap->nr_blk_descs * BLK_DESC_SIZE; + + if (snap->size > UINT64_MAX - SNAPSHOT_SIZE) + errx(1, "%s: overflow", __func__); + snap->size += SNAPSHOT_SIZE; + + xlseek(fd, hdr->size, SEEK_SET); + write_snap(fd, snap); + write_snap_blk_descs(fd, snap); + + if (hdr->size > UINT64_MAX - snap->size) + errx(1, "%s: overflow", __func__); + hdr->size += snap->size; + + if (hdr->nr_snaps > UINT64_MAX - 1) + errx(1, "%s: overflow", __func__); + hdr->nr_snaps++; +} + +/* + * The snapshot hash is calculated over the + * hash of its block descriptors. + */ +void +hash_snap(struct snap *snap, uint8_t *md, int hash_algo) +{ + struct hash_ctx ctx; + uint64_t i; + + if (hash_init(&ctx, hash_algo, MD_SIZE) < 0) + errx(1, "hash_init failed"); + for (i = 0; i < snap->nr_blk_descs; i++) { + struct blk_desc *blk_desc; + + blk_desc = &snap->blk_desc[i]; + hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md)); + } + hash_final(&ctx, md, MD_SIZE); +} + +/* Walk through all snapshots and call fn() on each one */ +void +walk_snap(int fd, struct snap_hdr *hdr, + int (*fn)(struct snap *, void *), void *arg) +{ + uint64_t i; + + xlseek(fd, SNAP_HDR_SIZE, SEEK_SET); + for (i = 0; i < hdr->nr_snaps; i++) { + struct snap *snap; + int ret; + + snap = alloc_snap(); + read_snap(fd, snap); + snap = grow_snap(snap, snap->nr_blk_descs); + read_snap_descs(fd, snap); + + ret = (*fn)(snap, arg); + free_snap(snap); + if (ret == WALK_STOP) + break; + } +}