dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit f16ec686af5b4b85ac6c5959361c2156259bd0e9
parent d60ace395a74a5efe067ee9cd5d85446c7facf43
Author: Dimitris Papastamos <dimitris.papastamos@arm.com>
Date:   Tue, 26 Feb 2019 10:46:36 +0000

Serialize/deserialize types

This should fix any endianness issues as well as avoid padding
problems.

Diffstat:
MMakefile | 30+++++++++++++++++++++++-------
MTODO | 1-
Mdedup.c | 28+++++++++++-----------------
Mdedup.h | 31+++++++++++++++++++++++++++----
Atypes.c | 214+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 275 insertions(+), 29 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,18 +2,34 @@ VERSION = 0.5 PREFIX = /usr/local MANPREFIX = $(PREFIX)/man BIN = dedup -SRC = $(BIN).c cache.c chunker.c pack.c unpack.c utils.c -OBJ = $(BIN).o cache.o chunker.o pack.o unpack.o utils.o +SRC = \ + $(BIN).c \ + arg.h \ + config.h \ + dedup.h \ + tree.h \ + cache.c \ + chunker.c \ + pack.c \ + types.c \ + unpack.c \ + utils.c \ + +OBJ = \ + $(BIN).o \ + cache.o \ + chunker.o \ + pack.o \ + types.o \ + unpack.o \ + utils.o + DISTFILES = \ $(SRC) \ + $(BIN).1 \ LICENSE \ Makefile \ README \ - arg.h \ - config.h \ - $(BIN).1 \ - dedup.h \ - tree.h \ CFLAGS = -g -Wall CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64 diff --git a/TODO b/TODO @@ -1,2 +1 @@ -endianness agnostic overflow checks diff --git a/dedup.c b/dedup.c @@ -107,13 +107,12 @@ append_snap(struct snapshot *snap) /* Update snapshot header */ snaphdr.nr_snapshots++; xlseek(ifd, 0, SEEK_SET); - xwrite(ifd, &snaphdr, sizeof(snaphdr)); + write_snaphdr(ifd, &snaphdr); /* Append snapshot */ xlseek(ifd, 0, SEEK_END); - snap->size = sizeof(*snap); - snap->size += snap->nr_blk_descs * sizeof(snap->blk_desc[0]); - xwrite(ifd, snap, snap->size); + write_snapshot(ifd, snap); + write_snapshot_blk_descs(ifd, snap); } static struct snapshot * @@ -400,19 +399,15 @@ walk_snap(int (*fn)(struct snapshot *, void *), void *arg) { uint64_t i; - xlseek(ifd, sizeof(snaphdr), SEEK_SET); + xlseek(ifd, SNAPHDR_LEN, SEEK_SET); for (i = 0; i < snaphdr.nr_snapshots; i++) { struct snapshot *snap; int ret; snap = alloc_snap(); - if (xread(ifd, snap, sizeof(*snap)) == 0) - errx(1, "read: unexpected EOF"); - + read_snapshot(ifd, snap); snap = grow_snap(snap, snap->nr_blk_descs); - if (xread(ifd, snap->blk_desc, - snap->nr_blk_descs * sizeof(snap->blk_desc[0])) == 0) - errx(1, "read: unexpected EOF"); + read_snapshot_descs(ifd, snap); ret = (*fn)(snap, arg); free(snap); @@ -424,7 +419,7 @@ walk_snap(int (*fn)(struct snapshot *, void *), void *arg) static int flush_cache(struct cache_entry *cache_entry) { - xwrite(cfd, cache_entry, sizeof(*cache_entry)); + write_cache_entry(cfd, cache_entry); return 0; } @@ -435,7 +430,7 @@ cache_nr_entries(void) if (fstat(cfd, &sb) < 0) err(1, "fstat"); - return sb.st_size / sizeof(struct cache_entry); + return sb.st_size / CACHE_ENTRY_LEN; } static void @@ -449,8 +444,7 @@ load_cache(void) for (i = 0; i < nr_entries; i++) { struct cache_entry cache_entry; - if (xread(cfd, &cache_entry, sizeof(cache_entry)) == 0) - errx(1, "read: unexpected EOF"); + read_cache_entry(cfd, &cache_entry); add_cache_entry(cache, &cache_entry); } } @@ -482,7 +476,7 @@ init(void) if (sb.st_size != 0) { uint8_t maj, min; - xread(ifd, &snaphdr, sizeof(snaphdr)); + read_snaphdr(ifd, &snaphdr); min = snaphdr.flags & 0xff; maj = (snaphdr.flags >> 8) & 0xff; @@ -492,7 +486,7 @@ init(void) } else { snaphdr.flags = (VER_MAJ << 8) | VER_MIN; snaphdr.st.min_blk_size = comp_size(BLKSIZE_MAX); - xwrite(ifd, &snaphdr, sizeof(snaphdr)); + write_snaphdr(ifd, &snaphdr); } cache = alloc_cache(); diff --git a/dedup.h b/dedup.h @@ -1,11 +1,22 @@ #include "config.h" -#define MSGSIZE 256 -#define MDSIZE 32 +/* + * These are the actual sizes of the structs in the + * file format itself. The types are serialized/deserialized + * using the helpers from types.c. Any modification made to + * the structs below will need to be reflected here and in types.c. + */ +#define SNAPHDR_LEN 152 +#define BLKDESC_LEN 48 +#define SNAPSHOT_LEN 304 +#define CACHE_ENTRY_LEN 48 + +#define MSGSIZE 256 +#define MDSIZE 32 /* snashot file format version */ -#define VER_MIN 1 -#define VER_MAJ 0 +#define VER_MIN 1 +#define VER_MAJ 0 struct cache; struct chunker; @@ -68,6 +79,18 @@ int pack(unsigned char *dst, char *fmt, ...); /* unpack.c */ int unpack(unsigned char *src, char *fmt, ...); +/* types.c */ +void read_snaphdr(int fd, struct snapshot_hdr *hdr); +void write_snaphdr(int fd, struct snapshot_hdr *hdr); +void write_snaphdr(int fd, struct snapshot_hdr *hdr); +void write_blk_desc(int fd, struct blk_desc *desc); +void read_snapshot(int fd, struct snapshot *snap); +void read_snapshot_descs(int fd, struct snapshot *snap); +void write_snapshot(int fd, struct snapshot *snap); +void write_snapshot_blk_descs(int fd, struct snapshot *snap); +void read_cache_entry(int fd, struct cache_entry *cache_entry); +void write_cache_entry(int fd, struct cache_entry *cache_entry); + /* utils.c */ void str2bin(char *s, uint8_t *d); off_t xlseek(int fd, off_t offset, int whence); diff --git a/types.c b/types.c @@ -0,0 +1,214 @@ +#include <assert.h> +#include <err.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> + +#include "dedup.h" + +void +read_snaphdr(int fd, struct snapshot_hdr *hdr) +{ + uint8_t buf[SNAPHDR_LEN]; + int n; + + if (xread(fd, buf, sizeof(buf)) == 0) + errx(1, "read_snaphdr: unexpected EOF"); + + n = unpack(buf, "qqq", + &hdr->flags, + &hdr->nr_snapshots, + &hdr->store_size); + + n += unpack(&buf[n], "qqqq", + &hdr->reserved[0], + &hdr->reserved[1], + &hdr->reserved[2], + &hdr->reserved[3]); + + n += unpack(&buf[n], "qqqqqq", + &hdr->st.orig_size, + &hdr->st.comp_size, + &hdr->st.dedup_size, + &hdr->st.min_blk_size, + &hdr->st.max_blk_size, + &hdr->st.nr_blks); + + n += unpack(&buf[n], "qqqqqq", + &hdr->st.reserved[0], + &hdr->st.reserved[1], + &hdr->st.reserved[2], + &hdr->st.reserved[3], + &hdr->st.reserved[4], + &hdr->st.reserved[5]); + + assert(n == SNAPHDR_LEN); +} + +void +write_snaphdr(int fd, struct snapshot_hdr *hdr) +{ + uint8_t buf[SNAPHDR_LEN]; + int n; + + n = pack(buf, "qqq", + hdr->flags, + hdr->nr_snapshots, + hdr->store_size); + + n += pack(&buf[n], "qqqq", + hdr->reserved[0], + hdr->reserved[1], + hdr->reserved[2], + hdr->reserved[3]); + + n += pack(&buf[n], "qqqqqq", + hdr->st.orig_size, + hdr->st.comp_size, + hdr->st.dedup_size, + hdr->st.min_blk_size, + hdr->st.max_blk_size, + hdr->st.nr_blks); + + n += pack(&buf[n], "qqqqqq", + hdr->st.reserved[0], + hdr->st.reserved[1], + hdr->st.reserved[2], + hdr->st.reserved[3], + hdr->st.reserved[4], + hdr->st.reserved[5]); + + assert(n == SNAPHDR_LEN); + xwrite(fd, buf, n); +} + +void +read_blk_desc(int fd, struct blk_desc *desc) +{ + uint8_t buf[BLKDESC_LEN]; + char fmt[BUFSIZ]; + int n; + + if (xread(fd, buf, sizeof(buf)) == 0) + errx(1, "read_blk_desc: unexpected EOF"); + + snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE); + n = unpack(buf, fmt, + desc->md, + &desc->offset, + &desc->size); + + assert(n == BLKDESC_LEN); +} + +void +write_blk_desc(int fd, struct blk_desc *desc) +{ + uint8_t buf[BLKDESC_LEN]; + char fmt[BUFSIZ]; + int n; + + snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE); + n = pack(buf, fmt, + desc->md, + desc->offset, + desc->size); + + assert(n == BLKDESC_LEN); + xwrite(fd, buf, n); +} + +void +read_snapshot(int fd, struct snapshot *snap) +{ + uint8_t buf[SNAPSHOT_LEN]; + char fmt[BUFSIZ]; + int n; + + if (xread(fd, buf, sizeof(buf)) == 0) + errx(1, "read_blk_desc: unexpected EOF"); + + snprintf(fmt, sizeof(fmt), "q'%d'%dq", MSGSIZE, MDSIZE); + n = unpack(buf, fmt, + &snap->size, + snap->msg, + snap->md, + &snap->nr_blk_descs); + + assert(n == SNAPSHOT_LEN); +}; + +void +read_snapshot_descs(int fd, struct snapshot *snap) +{ + uint64_t i; + + for (i = 0; i < snap->nr_blk_descs; i++) + read_blk_desc(fd, &snap->blk_desc[i]); +} + +void +write_snapshot(int fd, struct snapshot *snap) +{ + uint8_t buf[SNAPSHOT_LEN]; + char fmt[BUFSIZ]; + int n; + + snprintf(fmt, sizeof(fmt), "q'%d'%dq", MSGSIZE, MDSIZE); + + snap->size = SNAPHDR_LEN; + snap->size += snap->nr_blk_descs * BLKDESC_LEN; + n = pack(buf, fmt, + snap->size, + snap->msg, + snap->md, + snap->nr_blk_descs); + + assert(n == SNAPSHOT_LEN); + xwrite(fd, buf, n); +} + +void +write_snapshot_blk_descs(int fd, struct snapshot *snap) +{ + uint64_t i; + + for (i = 0; i < snap->nr_blk_descs; i++) + write_blk_desc(fd, &snap->blk_desc[i]); +} + +void +read_cache_entry(int fd, struct cache_entry *cache_entry) +{ + uint8_t buf[CACHE_ENTRY_LEN]; + char fmt[BUFSIZ]; + int n; + + if (xread(fd, buf, sizeof(buf)) == 0) + errx(1, "read_blk_desc: unexpected EOF"); + + snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE); + n = unpack(buf, fmt, + cache_entry->md, + &cache_entry->offset, + &cache_entry->size); + + assert(n == CACHE_ENTRY_LEN); +} + +void +write_cache_entry(int fd, struct cache_entry *cache_entry) +{ + uint8_t buf[CACHE_ENTRY_LEN]; + char fmt[BUFSIZ]; + int n; + + snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE); + n = pack(buf, fmt, + cache_entry->md, + cache_entry->offset, + cache_entry->size); + + assert(n == CACHE_ENTRY_LEN); + xwrite(fd, buf, n); +}