dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

dedup.h (7438B)


      1 #include "config.h"
      2 
      3 #define SNAPSF ".snapshots"
      4 #define STOREF ".store"
      5 
      6 /*
      7  * These are the actual sizes of the structs in the
      8  * file format itself.  The types are serialized/deserialized
      9  * using the helpers from types.c.  Any modification made to
     10  * the structs below will need to be reflected here and in types.c.
     11  */
     12 #define MSG_SIZE	256
     13 #define MD_SIZE		32
     14 
     15 #define SNAP_HDR_SIZE	104
     16 #define BLK_HDR_SIZE	16
     17 #define BLK_DESC_SIZE	(MD_SIZE + 16)
     18 #define SNAPSHOT_SIZE	(8 + MSG_SIZE + MD_SIZE + 8)
     19 
     20 /* file format version */
     21 #define VER_MIN	2
     22 #define VER_MAJ	0
     23 
     24 /* snapshot header and block header flags */
     25 #define VER_MIN_MASK	0xff
     26 #define VER_MAJ_SHIFT	8
     27 #define VER_MAJ_MASK	0xff
     28 
     29 /* block header flags */
     30 #define HASH_ALGO_SHIFT		19
     31 #define HASH_ALGO_MASK		0x7	/* max 8 hash algos */
     32 #define COMPR_ALGO_SHIFT	16
     33 #define COMPR_ALGO_MASK		0x7	/* max 8 compression algos */
     34 
     35 enum {
     36 	WALK_CONTINUE,
     37 	WALK_STOP
     38 };
     39 
     40 enum compr_algo {
     41 	COMPR_NONE,
     42 	COMPR_LZ4,
     43 	COMPR_SNAPPY,
     44 	NR_COMPRS,
     45 };
     46 
     47 enum hash_algo {
     48 	HASH_BLAKE2B,
     49 	HASH_BLAKE2BP,
     50 	HASH_BLAKE2S,
     51 	HASH_BLAKE2SP,
     52 	NR_HASHES,
     53 };
     54 
     55 struct chunker;
     56 struct icache;
     57 
     58 struct stats {
     59 	uint64_t orig_size;	/* original store size */
     60 	uint64_t compr_size;	/* compressed store size */
     61 	uint64_t dedup_size;	/* deduplicated store size */
     62 	uint64_t min_blk_size;
     63 	uint64_t max_blk_size;
     64 	uint64_t nr_blks;	/* number of unique blocks */
     65 	uint64_t reserved[4];
     66 };
     67 
     68 struct snap_hdr {
     69 	uint64_t flags;
     70 	uint64_t size;		/* size of snapshots file */
     71 	uint64_t nr_snaps;
     72 	struct stats st;
     73 };
     74 
     75 struct blk_hdr {
     76 	uint64_t flags;
     77 	uint64_t size;		/* size of store file */
     78 };
     79 
     80 struct blk_desc {
     81 	uint8_t md[MD_SIZE];	/* hash of block */
     82 	uint64_t offset;	/* offset into store file */
     83 	uint64_t size;		/* size of block */
     84 };
     85 
     86 struct snap {
     87 	uint64_t size;		/* size of snapshot (including block descriptors) */
     88 	uint8_t msg[MSG_SIZE];	/* arbitrary message attached to snapshot */
     89 	uint8_t md[MD_SIZE];	/* hash of snapshot (hash of all block descriptor hashes) */
     90 	uint64_t nr_blk_descs;
     91 	struct blk_desc blk_desc[];
     92 };
     93 
     94 struct compr_ctx {
     95 	struct compr_ops *ops;
     96 };
     97 
     98 struct hash_ctx {
     99 	union {
    100 		blake2b_state blake2b_ctx;
    101 		blake2bp_state blake2bp_ctx;
    102 		blake2s_state blake2s_ctx;
    103 		blake2sp_state blake2sp_ctx;
    104 	} u;
    105 	struct hash_ops *ops;
    106 };
    107 
    108 /* dedup.c */
    109 extern int verbose;
    110 
    111 /* chunker.c */
    112 struct chunker *alloc_chunker(int fd, size_t min_size, size_t max_size,
    113                               size_t mask, size_t win_size);
    114 void free_chunker(struct chunker *chunker);
    115 ssize_t fill_chunker(struct chunker *chunker);
    116 uint8_t *get_chunk(struct chunker *chunker, size_t *chunk_size);
    117 void drain_chunker(struct chunker *chunker);
    118 
    119 /* compress-none.c */
    120 int none_init(struct compr_ctx *ctx);
    121 size_t none_size(struct compr_ctx *ctx, size_t n);
    122 size_t none_compr(struct compr_ctx *ctx, const void *in, void *out,
    123                   size_t insize, size_t outsize);
    124 size_t none_decompr(struct compr_ctx *ctx, const void *in, void *out,
    125                     size_t insize, size_t outsize);
    126 int none_final(struct compr_ctx *ctx);
    127 
    128 /* compress-lz4.c */
    129 int lz4_init(struct compr_ctx *ctx);
    130 size_t lz4_size(struct compr_ctx *ctx, size_t n);
    131 size_t lz4_compr(struct compr_ctx *ctx, const void *in, void *out,
    132                  size_t insize, size_t outsize);
    133 size_t lz4_decompr(struct compr_ctx *ctx, const void *in, void *out,
    134                    size_t insize, size_t outsize);
    135 int lz4_final(struct compr_ctx *ctx);
    136 
    137 /* compress-snappy.c */
    138 int snappy_init(struct compr_ctx *ctx);
    139 size_t snappy_size(struct compr_ctx *ctx, size_t n);
    140 size_t snappy_compr(struct compr_ctx *ctx, const void *in, void *out,
    141                     size_t insize, size_t outsize);
    142 size_t snappy_decompr(struct compr_ctx *ctx, const void *in, void *out,
    143                       size_t insize, size_t outsize);
    144 int snappy_final(struct compr_ctx *ctx);
    145 
    146 /* compress.c */
    147 int compr_init(struct compr_ctx *ctx, int type);
    148 int compr_size(struct compr_ctx *ctx, size_t n);
    149 size_t compr(struct compr_ctx *ctx, const void *in, void *out,
    150              size_t insize, size_t outsize);
    151 size_t decompr(struct compr_ctx *ctx, const void *in, void *out,
    152                size_t insize, size_t outsize);
    153 int compr_final(struct compr_ctx *ctx);
    154 int compr_name2type(char *name);
    155 char *compr_type2name(int type);
    156 void compr_list(int fd);
    157 
    158 /* hash-blake2b.c */
    159 int blake2bi(struct hash_ctx *ctx, size_t n);
    160 int blake2bu(struct hash_ctx *ctx, const void *buf, size_t n);
    161 int blake2bf(struct hash_ctx *ctx, void *buf, size_t n);
    162 
    163 /* hash-blake2bp.c */
    164 int blake2bpi(struct hash_ctx *ctx, size_t n);
    165 int blake2bpu(struct hash_ctx *ctx, const void *buf, size_t n);
    166 int blake2bpf(struct hash_ctx *ctx, void *buf, size_t n);
    167 
    168 /* hash-blake2s.c */
    169 int blake2si(struct hash_ctx *ctx, size_t n);
    170 int blake2su(struct hash_ctx *ctx, const void *buf, size_t n);
    171 int blake2sf(struct hash_ctx *ctx, void *buf, size_t n);
    172 
    173 /* hash-blake2sp.c */
    174 int blake2spi(struct hash_ctx *ctx, size_t n);
    175 int blake2spu(struct hash_ctx *ctx, const void *buf, size_t n);
    176 int blake2spf(struct hash_ctx *ctx, void *buf, size_t n);
    177 
    178 /* hash.c */
    179 int hash_init(struct hash_ctx *ctx, int type, size_t n);
    180 int hash_update(struct hash_ctx *ctx, const void *buf, size_t n);
    181 int hash_final(struct hash_ctx *ctx, void *buf, size_t n);
    182 int hash_name2type(char *name);
    183 char *hash_type2name(int type);
    184 void hash_list(int fd);
    185 
    186 /* icache.c */
    187 struct icache *alloc_icache(void);
    188 void free_icache(struct icache *icache);
    189 void insert_icache(struct icache *icache, struct blk_desc *desc);
    190 int lookup_icache(struct icache *icache, struct blk_desc *desc);
    191 void icache_stats(struct icache *icache, unsigned long long *hits,
    192                   unsigned long long *misses);
    193 
    194 /* pack.c */
    195 int pack(unsigned char *dst, char *fmt, ...);
    196 
    197 /* unpack.c */
    198 int unpack(unsigned char *src, char *fmt, ...);
    199 
    200 /* types.c */
    201 void read_snap_hdr(int fd, struct snap_hdr *hdr);
    202 void write_snap_hdr(int fd, struct snap_hdr *hdr);
    203 void read_blk_hdr(int fd, struct blk_hdr *hdr);
    204 void write_blk_hdr(int fd, struct blk_hdr *hdr);
    205 void read_blk_desc(int fd, struct blk_desc *desc);
    206 void write_blk_desc(int fd, struct blk_desc *desc);
    207 void read_snap(int fd, struct snap *snap);
    208 void read_snap_descs(int fd, struct snap *snap);
    209 void write_snap(int fd, struct snap *snap);
    210 void write_snap_blk_descs(int fd, struct snap *snap);
    211 
    212 /* utils.c */
    213 void str2bin(char *s, uint8_t *d);
    214 off_t xlseek(int fd, off_t offset, int whence);
    215 ssize_t xread(int fd, void *buf, size_t nbytes);
    216 ssize_t xwrite(int fd, const void *buf, size_t nbytes);
    217 void init_blk_hdr(struct blk_hdr *hdr, int compr_algo, int hash_algo);
    218 void init_snap_hdr(struct snap_hdr *hdr);
    219 void load_blk_hdr(int fd, struct blk_hdr *hdr, int *compr_algo, int *hash_algo);
    220 void load_snap_hdr(int fd, struct snap_hdr *hdr);
    221 struct snap *alloc_snap(void);
    222 void free_snap(struct snap *snap);
    223 struct snap *grow_snap(struct snap *snap, uint64_t nr_blk_descs);
    224 void append_snap(int fd, struct snap_hdr *hdr, struct snap *snap);
    225 void hash_snap(struct snap *snap, uint8_t *md, int hash_algo);
    226 void walk_snap(int fd, struct snap_hdr *hdr,
    227                int (*fn)(struct snap *, void *), void *arg);
    228 uint8_t *alloc_buf(size_t size);
    229 void free_buf(uint8_t *buf);
    230 void read_blk(int fd, uint8_t *buf, struct blk_desc *blk_desc);
    231 void append_blk(int fd, struct blk_hdr *hdr, uint8_t *buf,
    232                 struct blk_desc *blk_desc);
    233 void hash_blk(uint8_t *buf, size_t size, uint8_t *md, int hash_algo);