commit c0597760335a6dfcfeb37a023f6362567079ba7f
parent 367c26efe780933f7fc2201384d8e1c0feb8a77a
Author: sin <sin@2f30.org>
Date: Fri, 15 Feb 2019 16:54:09 +0000
Preparation for variable length dedup support
Diffstat:
M | LICENSE | | | 3 | ++- |
M | Makefile | | | 10 | +++++----- |
M | TODO | | | 3 | ++- |
M | dedup.1 | | | 1 | + |
M | dedup.c | | | 586 | +++++++++++++++++++++++++++++++++++++++++++------------------------------------ |
D | sha256.c | | | 261 | ------------------------------------------------------------------------------- |
D | sha256.h | | | 24 | ------------------------ |
7 files changed, 331 insertions(+), 557 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -1,4 +1,5 @@
-© 2018 Dimitris Papastamos <sin@2f30.org>
+© 2019 Dimitris Papastamos <sin@2f30.org>
+© 2019 z3bra <contactatz3bradotorg>
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
diff --git a/Makefile b/Makefile
@@ -1,18 +1,18 @@
VERSION = 0.0
PREFIX = /usr/local
MANPREFIX = $(PREFIX)/man
-SRC = dedup.c sha256.c
-OBJ = dedup.o sha256.o
+SRC = dedup.c
+OBJ = dedup.o
BIN = dedup
DISTFILES = $(SRC) LICENSE Makefile README arg.h dedup.1 tree.h
CFLAGS = -g -Wall
-CPPFLAGS = -I/usr/local/include
+CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64
+LDLIBS = -lcrypto
all: $(BIN)
-dedup.o: arg.h sha256.h tree.h
-sha256.o: sha256.h
+dedup.o: arg.h tree.h
clean:
rm -f $(OBJ) $(BIN) $(BIN)-$(VERSION).tar.gz
diff --git a/TODO b/TODO
@@ -1,4 +1,5 @@
endianness agnostic
version field in entry header
-lseek64 support
look into variable-length dedup
+file locking
+overflow checks
diff --git a/dedup.1 b/dedup.1
@@ -31,3 +31,4 @@ files will be created.
.El
.Sh AUTHORS
.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dedup.c b/dedup.c
@@ -7,47 +7,59 @@
#include <string.h>
#include <unistd.h>
+#include <openssl/sha.h>
+
#include "arg.h"
-#include "sha256.h"
#include "tree.h"
#define INDEXF ".index"
#define STOREF ".store"
#define CACHEF ".cache"
-#define BLKSIZ 4096
+#define BLKSIZ 65536
+#define WINSIZ 4095
+#define MDSIZ SHA256_DIGEST_LENGTH
+
+#define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
+
+enum {
+ WALK_CONTINUE,
+ WALK_STOP
+};
+/* index file header */
struct enthdr {
uint64_t flags;
uint64_t nents;
-} __attribute__((packed));
+};
+
+/* block descriptor */
+struct bdescr {
+ uint8_t md[MDSIZ];
+ uint64_t offset;
+ uint64_t size;
+};
+/* index file entry */
struct ent {
uint64_t size;
- uint8_t reserved[7];
- uint8_t md[32];
+ uint8_t md[MDSIZ]; /* hash of file */
uint64_t nblks;
- uint64_t blks[];
-} __attribute__((packed));
+ struct bdescr bdescr[];
+};
-struct blk {
- uint8_t md[32];
- uint64_t size;
- uint8_t data[BLKSIZ];
-} __attribute__((packed));
-
-struct cache_data {
- uint8_t md[32];
- uint64_t blkidx;
-} __attribute__((packed));
-
-struct cache_ent {
- struct cache_data data;
- int dirty;
- RB_ENTRY(cache_ent) e;
+/* cache entry */
+struct cent {
+ struct bdescr bdescr;
+ RB_ENTRY(cent) e;
+};
+
+struct extract_args {
+ uint8_t *md;
+ int fd;
};
-RB_HEAD(cache, cache_ent) cache_head;
+RB_HEAD(cache, cent) cache_head;
struct enthdr enthdr;
int ifd;
int sfd;
@@ -55,58 +67,101 @@ int cfd;
int verbose;
char *argv0;
-void
-dump_md(const uint8_t *md, size_t len)
+/*
+ * Static table for use in buzhash algorithm.
+ * 256 * 32 bits randomly generated unique integers
+ */
+uint32_t buz[] = {
+ 0xbc9fa594,0x30a8f827,0xced627a7,0xdb46a745,0xcfa4a9e8,0x77cccb59,0xddb66276,0x3adc532f,
+ 0xfe8b67d3,0x8155b59e,0x0c893666,0x1d757009,0x17394ee4,0x85d94c07,0xcacd52da,0x076c6f79,
+ 0xead0a798,0x6c7ccb4a,0x2639a1b8,0x3aa5ae32,0x3e6218d2,0xb290d980,0xa5149521,0x4b426119,
+ 0xd3230fc7,0x677c1cc4,0x2b64603c,0x01fe92a8,0xbe358296,0xa7e7fac7,0xf509bf41,0x04b017ad,
+ 0xf900344c,0x8e14e202,0xb2a6e9b4,0x3db3c311,0x960286a8,0xf6bf0468,0xed54ec94,0xf358070c,
+ 0x6a4795dd,0x3f7b925c,0x5e13a060,0xfaecbafe,0x03c8bb55,0x8a56ba88,0x633e3b49,0xe036bbbe,
+ 0x1ed3dbb5,0x76e8ad74,0x79d346ab,0x44b4ccc4,0x71eb22d3,0xa1aa3f24,0x50e05b81,0xa3b450d3,
+ 0x7f5caffb,0xa1990650,0x54c44800,0xda134b65,0x72362eea,0xbd12b8e6,0xf7c99fdc,0x020d48c7,
+ 0x9d9c3d46,0x32b75615,0xe61923cf,0xadc09d8f,0xab11376b,0xd66fe4cd,0xb3b086b6,0xb8345b9f,
+ 0x59029667,0xae0e937c,0xcbd4d4ba,0x720bb3fb,0x5f7d2ca3,0xec24ba15,0x6b40109b,0xf0a54587,
+ 0x3acf9420,0x466e981d,0xc66dc124,0x150ef7b4,0xc3ce718e,0x136774f5,0x46684ab4,0xb4b490f0,
+ 0x26508a8b,0xf12febc8,0x4b99171b,0xfc373c84,0x339b5677,0x41703ff3,0x7cadbbd7,0x15ea24e2,
+ 0x7a2f9783,0xed6a383a,0x649eb072,0x79970941,0x2abd28ad,0x4375e00c,0x9df084f7,0x6fdeec6c,
+ 0x6619ac6d,0x7d256f4d,0x9b8e658a,0x3d7627e9,0xd5a98d45,0x15f84223,0x9b6acef5,0xf876be67,
+ 0xe3ae7089,0x84e2b64a,0x6818a969,0x86e9ba4e,0xa24a5b57,0x61570cf1,0xa5f8fc91,0x879d8383,
+ 0x91b13866,0x75e87961,0x16db8138,0x5a2ff6b8,0x8f664e9b,0x894e1496,0x88235c5b,0xcdb3b580,
+ 0xa2e80109,0xb0f88a82,0xd12cd340,0x93fbc37d,0xf4d1eb82,0xce42f309,0x16ffd2c2,0xb4dfef2b,
+ 0xb8b1a33e,0x4708a5e6,0xba66dd88,0xa9ec0da6,0x6f8ee2c9,0xad8b9993,0x1d6a25a8,0x1f3d08ce,
+ 0x149c04e7,0x5cd1fa51,0xb84c89c7,0xeced6f8c,0xe328b30f,0x084fa836,0x6d1bb1b7,0x94c78ea5,
+ 0x14973034,0xf1a1bcef,0x48b798d2,0xded9ca9e,0x5fd965d0,0x92544eb1,0x5e80f189,0xcbbf5e15,
+ 0x4d8121f0,0x5dd3b92f,0xd9ea98fb,0x2dbf5644,0x0fbcb9b7,0x20a1db53,0x7c3fcc98,0x36744fbd,
+ 0xced08954,0x8e7c5efe,0x3c5f6733,0x657477be,0x3630a02d,0x38bcbda0,0xb7702575,0x4a7f4bce,
+ 0x0e7660fe,0x4dcb91b5,0x4fd7ffd3,0x041821c1,0xa846a181,0xc8048e9e,0xd4b05072,0x986e0509,
+ 0xa00aaeeb,0x02e3526a,0x2fac4843,0xfa98e805,0x923ecd8d,0x395d9546,0x8674c3cd,0xae5a8a71,
+ 0x966dfe45,0x5c9ceba5,0x0830a1cf,0xa1750981,0x8f604480,0x28ea0c9a,0x0da12413,0x98b0b3c5,
+ 0xa21d473a,0x96ce4308,0xe9a1001b,0x8bbacb44,0x18bad3f4,0xe3121acb,0x46a9b45f,0x92cd9704,
+ 0xc1a7c619,0x3281e361,0x462e8c79,0x9e572f93,0x7239e5f0,0x67d8e6ba,0x13747ce3,0xf01ee64a,
+ 0xe7d0ae12,0xeea04088,0xe5b36767,0x17558eae,0x678ffbe6,0xe0bbc866,0x0c24adec,0xa9cbb869,
+ 0x3fd44ee1,0x9ca4ca06,0x04c0ef00,0x04589a21,0x9cf9c819,0x976f6ca1,0x8a30e66a,0x004d6f7e,
+ 0x384c8851,0x5bc97eb8,0xc6c49339,0x5aa386c7,0x74bdf8af,0x9b713750,0x4112f8c2,0x2895dae1,
+ 0xf576d905,0x9de98bce,0xb2b26bcd,0xd46707a0,0x147fbb46,0xa52c6e50,0xe43128fc,0x374ad964,
+ 0x8dfd4d53,0xc4d0c087,0x31dfb5ca,0xa44589b5,0x6b637e2e,0x663f6b45,0xd2d8baa0,0x1dac7e4c
+};
+
+/* Buzhash: https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial */
+uint32_t
+buzh_init(uint8_t *buf, size_t size)
{
size_t i;
+ uint32_t fp = 0;
- for (i = 0; i < len; i++)
- fprintf(stderr, "%02x", md[i]);
+ for (i = size - 1; i > 0; i--, buf++)
+ fp ^= ROTL(buz[*buf], i % 32);
+
+ return fp ^ buz[*buf];
}
-void
-dump_enthdr(struct enthdr *hdr)
+uint32_t
+buzh_update(uint32_t fp, uint8_t in, uint8_t out, size_t size)
{
- fprintf(stderr, "hdr->flags = %llx\n",
- (unsigned long long)hdr->flags);
- fprintf(stderr, "hdr->nents = %llx\n",
- (unsigned long long)hdr->nents);
+ return ROTL(fp, 1) ^ ROTL(buz[out], size % 32) ^ buz[in];
}
-void
-dump_ent(struct ent *ent)
+uint64_t
+chunk_blk(uint8_t *buf, size_t size)
{
- uint64_t i;
-
- fprintf(stderr, "ent->size: %llu\n", (unsigned long long)ent->size);
- fprintf(stderr, "ent->md: ");
- dump_md(ent->md, sizeof(ent->md));
- fputc('\n', stderr);
- if (verbose) {
- fprintf(stderr, "ent->nblks: %llu\n",
- (unsigned long long)ent->nblks);
- for (i = 0; i < ent->nblks; i++)
- fprintf(stderr, "ent->blks[%llu]: %llu\n",
- (unsigned long long)i,
- (unsigned long long)ent->blks[i]);
+ size_t i;
+ uint32_t fp;
+
+ /*
+ * Chunking blocks is decided using a rolling hash + binary pattern.
+ * The buzhash algorithm is used to "fingerprint" a fixed size window.
+ * Once the lower 13 bits of this fingerprint are all zeros,
+ * the block is chunked.
+ * If the pattern can't be matched, then we return the buffer size.
+ */
+ fp = buzh_init(buf, WINSIZ);
+ for (i = 1; i < size - WINSIZ; i++) {
+ fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ], WINSIZ);
+ if ((fp & 0x00001fff) == 0)
+ return i + WINSIZ;
}
+ return size;
}
void
-dump_blk(struct blk *blk)
+print_md(const uint8_t *md, size_t size)
{
- fprintf(stderr, "blk->md: ");
- dump_md(blk->md, sizeof(blk->md));
- putchar('\n');
- fprintf(stderr, "blk->size: %llu\n", (unsigned long long)blk->size);
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ fprintf(stderr, "%02x", md[i]);
}
void
str2bin(char *s, uint8_t *d)
{
- size_t i, len = strlen(s) / 2;
+ size_t i, size = strlen(s) / 2;
- for (i = 0; i < len; i++, s += 2)
+ for (i = 0; i < size; i++, s += 2)
sscanf(s, "%2hhx", &d[i]);
}
@@ -151,64 +206,68 @@ xwrite(int fd, const void *buf, size_t nbytes)
}
int
-cache_ent_cmp(struct cache_ent *e1, struct cache_ent *e2)
+cent_cmp(struct cent *e1, struct cent *e2)
{
int r;
- r = memcmp(e1->data.md, e2->data.md, sizeof(e1->data.md));
+ r = memcmp(e1->bdescr.md, e2->bdescr.md, sizeof(e1->bdescr.md));
if (r > 0)
return 1;
else if (r < 0)
return -1;
return 0;
}
-RB_PROTOTYPE(cache, cache_ent, e, cache_ent_cmp);
-RB_GENERATE(cache, cache_ent, e, cache_ent_cmp);
+RB_PROTOTYPE(cache, cent, e, cent_cmp);
+RB_GENERATE(cache, cent, e, cent_cmp);
-struct cache_ent *
-alloc_cache_ent(uint8_t *md, uint64_t blkidx)
+struct cent *
+alloc_cent(void)
{
- struct cache_ent *ent;
+ struct cent *ent;
ent = calloc(1, sizeof(*ent));
if (ent == NULL)
- err(1, "malloc");
- memcpy(&ent->data.md, md, sizeof(ent->data.md));
- ent->data.blkidx = blkidx;
+ err(1, "calloc");
return ent;
}
void
-add_cache_ent(struct cache_ent *ent)
+add_cent(struct cent *cent)
{
- RB_INSERT(cache, &cache_head, ent);
+ RB_INSERT(cache, &cache_head, cent);
}
void
flush_cache(void)
{
- struct cache_ent *ent;
+ struct cent *cent;
- RB_FOREACH(ent, cache, &cache_head) {
- if (!ent->dirty)
- continue;
- lseek(cfd, ent->data.blkidx * sizeof(ent->data), SEEK_SET);
- xwrite(cfd, &ent->data, sizeof(ent->data));
- ent->dirty = 0;
- }
+ lseek(cfd, 0, SEEK_SET);
+ RB_FOREACH(cent, cache, &cache_head)
+ xwrite(cfd, ¢->bdescr, sizeof(cent->bdescr));
}
void
free_cache(void)
{
- struct cache_ent *ent, *tmp;
+ struct cent *cent, *tmp;
- RB_FOREACH_SAFE(ent, cache, &cache_head, tmp) {
- RB_REMOVE(cache, &cache_head, ent);
- free(ent);
+ RB_FOREACH_SAFE(cent, cache, &cache_head, tmp) {
+ RB_REMOVE(cache, &cache_head, cent);
+ free(cent);
}
}
+uint64_t
+cache_nents(void)
+{
+ struct stat sb;
+
+ if (fstat(cfd, &sb) < 0)
+ err(1, "fstat");
+ return sb.st_size / sizeof(struct bdescr);
+}
+
void
append_ent(struct ent *ent)
{
@@ -220,7 +279,7 @@ append_ent(struct ent *ent)
/* Append entry */
lseek(ifd, 0, SEEK_END);
ent->size = sizeof(*ent);
- ent->size += ent->nblks * sizeof(ent->blks[0]);
+ ent->size += ent->nblks * sizeof(ent->bdescr[0]);
xwrite(ifd, ent, ent->size);
}
@@ -231,7 +290,7 @@ alloc_ent(void)
ent = calloc(1, sizeof(*ent));
if (ent == NULL)
- err(1, "malloc");
+ err(1, "calloc");
return ent;
}
@@ -241,271 +300,265 @@ grow_ent(struct ent *ent, uint64_t nblks)
size_t size;
size = sizeof(*ent);
- size += nblks * sizeof(ent->blks[0]);
+ size += nblks * sizeof(ent->bdescr[0]);
ent = realloc(ent, size);
if (ent == NULL)
err(1, "realloc");
return ent;
}
-uint64_t
-storefile_nblks(void)
+uint8_t *
+alloc_buf(size_t size)
{
- struct stat sb;
+ void *p;
- if (fstat(sfd, &sb) < 0)
- err(1, "fstat");
- return sb.st_size / sizeof(struct blk);
+ p = calloc(1, size);
+ if (p == NULL)
+ err(1, "calloc");
+ return p;
}
-uint64_t
-cachefile_nblks(void)
+void
+hash_blk(uint8_t *buf, size_t size, uint8_t *md)
{
- struct stat sb;
+ SHA256_CTX ctx;
- if (fstat(cfd, &sb) < 0)
- err(1, "fstat");
- return sb.st_size / sizeof(struct cache_data);
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, buf, size);
+ SHA256_Final(md, &ctx);
}
void
-hash_blk(struct blk *blk)
+read_blk(uint8_t *buf, struct bdescr *bdescr)
{
- sha256_context ctx;
-
- sha256_starts(&ctx);
- sha256_update(&ctx, blk->data, blk->size);
- sha256_finish(&ctx, blk->md);
+ lseek(sfd, bdescr->offset, SEEK_SET);
+ if (xread(sfd, buf, bdescr->size) == 0)
+ errx(1, "read: unexpected EOF");
}
void
-read_blk(struct blk *blk, off_t blkidx)
+append_blk(uint8_t *buf, size_t size)
{
- lseek(sfd, blkidx * sizeof(*blk), SEEK_SET);
- if (xread(sfd, blk, sizeof(*blk)) == 0)
- errx(1, "unexpected EOF");
+ lseek(sfd, 0, SEEK_END);
+ xwrite(sfd, buf, size);
}
-void
-append_blk(struct blk *blk)
+off_t
+store_size(void)
{
- lseek(sfd, 0, SEEK_END);
- xwrite(sfd, blk, sizeof(*blk));
+ return lseek(sfd, 0, SEEK_END);
}
int
-lookup_blk(struct blk *blk, uint64_t *blkidx)
+lookup_blk(uint8_t *md, struct bdescr *bdescr)
{
- struct cache_ent *ent, key;
+ struct cent *ent, key;
- memcpy(key.data.md, blk->md, sizeof(key.data.md));
+ memcpy(key.bdescr.md, md, sizeof(key.bdescr.md));
ent = RB_FIND(cache, &cache_head, &key);
if (ent != NULL) {
- *blkidx = ent->data.blkidx;
+ *bdescr = ent->bdescr;
return 0;
}
return -1;
}
void
-extract(char *id, int fd)
-{
- uint8_t md[32];
- uint64_t nblks, i;
-
- str2bin(id, md);
- nblks = storefile_nblks();
- lseek(ifd, sizeof(enthdr), SEEK_SET);
- for (i = 0; i < enthdr.nents; i++) {
- uint64_t j;
- struct ent *ent;
-
- /* Load index entry */
- ent = alloc_ent();
- if (xread(ifd, ent, sizeof(*ent)) == 0)
- errx(1, "unexpected EOF");
-
- /* Check if we've located the right file */
- if (memcmp(ent->md, md, sizeof(ent->md)) != 0) {
- free(ent);
- /* Skip over index entry block table */
- lseek(ifd, ent->nblks * sizeof(ent->blks[0]), SEEK_CUR);
- continue;
- }
-
- /* Load index entry block table */
- ent = grow_ent(ent, ent->nblks);
- if (xread(ifd, ent->blks,
- ent->nblks * sizeof(ent->blks[0])) == 0)
- errx(1, "unexpected EOF");
-
- /* Blast file blocks to file descriptor */
- for (j = 0; j < ent->nblks; j++) {
- struct blk blk;
-
- if (ent->blks[j] > nblks)
- errx(1, "index is corrupted");
- read_blk(&blk, ent->blks[j]);
- xwrite(fd, blk.data, blk.size);
- }
- free(ent);
- break;
- }
- if (i == enthdr.nents)
- errx(1, "%s: unknown hash %s", __func__, id);
-}
-
-void
dedup(int fd)
{
- sha256_context ctx;
- struct blk blk;
+ uint8_t md[MDSIZ];
+ uint8_t *buf;
+ SHA256_CTX ctx;
struct ent *ent;
ssize_t n;
+ buf = alloc_buf(BLKSIZ);
ent = alloc_ent();
- sha256_starts(&ctx);
- while ((n = xread(fd, blk.data, BLKSIZ)) > 0) {
- uint64_t blkidx;
- blk.size = n;
- hash_blk(&blk);
+ SHA256_Init(&ctx);
+ while ((n = xread(fd, buf, BLKSIZ)) > 0) {
+ struct bdescr bdescr;
+
+ hash_blk(buf, n, md);
+
+ /* Calculate file hash one block at a time */
+ SHA256_Update(&ctx, buf, n);
- /* Rolling hash of input stream */
- sha256_update(&ctx, blk.data, blk.size);
- /* Prepare for adding a new block index for this entry */
ent = grow_ent(ent, ent->nblks + 1);
+ if (lookup_blk(md, &bdescr) < 0) {
+ struct bdescr bdescr;
+ struct cent *cent;
- if (lookup_blk(&blk, &blkidx) < 0) {
- struct cache_ent *cache_ent;
+ /* Block not found, create new block descriptor */
+ memcpy(bdescr.md, md, sizeof(bdescr));
+ bdescr.offset = store_size();
+ bdescr.size = n;
- blkidx = storefile_nblks();
+ /* Update index entry */
+ ent->bdescr[ent->nblks++] = bdescr;
- /* Create a cache entry for this block */
- cache_ent = alloc_cache_ent(blk.md, blkidx);
- add_cache_ent(cache_ent);
- cache_ent->dirty = 1;
+ /* Store block */
+ append_blk(buf, n);
- ent->blks[ent->nblks++] = blkidx;
- append_blk(&blk);
+ /* Create a cache entry for this block */
+ cent = alloc_cent();
+ cent->bdescr = bdescr;
+ add_cent(cent);
} else {
- ent->blks[ent->nblks++] = blkidx;
+ /* Found block with the same hash, update index entry */
+ ent->bdescr[ent->nblks++] = bdescr;
}
}
if (ent->nblks > 0) {
/* Calculate hash and add this entry to the index */
- sha256_finish(&ctx, ent->md);
+ SHA256_Final(ent->md, &ctx);
append_ent(ent);
}
+
free(ent);
+ free(buf);
+}
- flush_cache();
+int
+extract(struct ent *ent, void *arg)
+{
+ uint8_t *buf;
+ struct extract_args *args = arg;
+ uint64_t i;
+
+ if (memcmp(ent->md, args->md, sizeof(ent->md)) != 0)
+ return WALK_CONTINUE;
+
+ buf = alloc_buf(BLKSIZ);
+ for (i = 0; i < ent->nblks; i++) {
+ read_blk(buf, &ent->bdescr[i]);
+ xwrite(args->fd, buf, ent->bdescr[i].size);
+ }
+ free(buf);
+ return WALK_STOP;
}
-void
-check(void)
+int
+check(struct ent *ent, void *arg)
{
- uint64_t nblks, i, j;
+ uint8_t md[MDSIZ];
+ uint8_t *buf;
+ SHA256_CTX ctx;
+ uint64_t i;
- nblks = storefile_nblks();
- lseek(ifd, sizeof(enthdr), SEEK_SET);
- for (i = 0; i < enthdr.nents; i++) {
- uint8_t md[32];
- sha256_context ctx;
- struct ent *ent;
+ buf = alloc_buf(BLKSIZ);
+ /*
+ * Calculate hash for each block and compare
+ * with index entry block descriptor
+ */
+ for (i = 0; i < ent->nblks; i++) {
+ read_blk(buf, &ent->bdescr[i]);
- ent = alloc_ent();
- if (xread(ifd, ent, sizeof(*ent)) == 0)
- errx(1, "unexpected EOF");
- ent = grow_ent(ent, ent->nblks);
- if (xread(ifd, ent->blks,
- ent->nblks * sizeof(ent->blks[0])) == 0)
- errx(1, "unexpected EOF");
-
- sha256_starts(&ctx);
- for (j = 0; j < ent->nblks; j++) {
- struct blk blk;
-
- if (ent->blks[j] > nblks)
- errx(1, "index is corrupted");
- read_blk(&blk, ent->blks[j]);
- sha256_update(&ctx, blk.data, blk.size);
- }
- sha256_finish(&ctx, md);
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, buf, ent->bdescr[i].size);
+ SHA256_Final(md, &ctx);
- if (memcmp(ent->md, md, sizeof(ent->md)) != 0)
- errx(1, "hash mismatch");
+ if (memcmp(ent->bdescr[i].md, md,
+ sizeof(ent->bdescr[i]).md) == 0)
+ continue;
- free(ent);
+ fprintf(stderr, "Block hash mismatch\n");
+ fprintf(stderr, " Expected hash: ");
+ print_md(ent->md, sizeof(ent->md));
+ fputc('\n', stderr);
+ fprintf(stderr, " Actual hash: ");
+ print_md(md, sizeof(md));
+ fputc('\n', stderr);
+ fprintf(stderr, " Offset: %llu\n",
+ (unsigned long long)ent->bdescr[i].offset);
+ fprintf(stderr, " Size: %llu\n",
+ (unsigned long long)ent->bdescr[i].size);
}
+ free(buf);
+ return WALK_CONTINUE;
}
-void
-list(void)
+int
+list(struct ent *ent, void *arg)
+{
+ print_md(ent->md, sizeof(ent->md));
+ putchar('\n');
+ return WALK_CONTINUE;
+}
+
+int
+rebuild_cache(struct ent *ent, void *arg)
{
+ uint8_t md[MDSIZ];
+ uint8_t *buf;
+ SHA256_CTX ctx;
uint64_t i;
- lseek(ifd, sizeof(enthdr), SEEK_SET);
- for (i = 0; i < enthdr.nents; i++) {
- struct ent ent;
- size_t i;
-
- if (xread(ifd, &ent, sizeof(ent)) == 0)
- errx(1, "unexpected EOF");
-
- for (i = 0; i < sizeof(ent.md); i++)
- printf("%02x", ent.md[i]);
- if (verbose)
- printf(" %llu", (unsigned long long)ent.nblks * BLKSIZ);
- putchar('\n');
- lseek(ifd, ent.nblks * sizeof(ent.blks[0]), SEEK_CUR);
+ buf = alloc_buf(BLKSIZ);
+ for (i = 0; i < ent->nblks; i++) {
+ struct cent *cent;
+
+ read_blk(buf, &ent->bdescr[i]);
+
+ SHA256_Init(&ctx);
+ SHA256_Update(&ctx, buf, ent->bdescr[i].size);
+ SHA256_Final(md, &ctx);
+
+ cent = alloc_cent();
+ memcpy(cent->bdescr.md, md, sizeof(cent->bdescr.md));
+ cent->bdescr = ent->bdescr[i];
+ add_cent(cent);
}
+ free(buf);
+ return WALK_CONTINUE;
}
+/* Walk through all index entries and call fn() on each one */
void
-rebuild_cache(void)
+walk(int (*fn)(struct ent *, void *), void *arg)
{
- uint64_t nblks, i;
+ struct ent *ent;
+ uint64_t i;
- if (verbose)
- fprintf(stderr, "rebuilding cache...");
- nblks = storefile_nblks();
- lseek(cfd, 0, SEEK_SET);
- for (i = 0; i < nblks; i++) {
- struct cache_ent *ent;
- struct blk blk;
-
- read_blk(&blk, i);
- ent = alloc_cache_ent(blk.md, i);
- add_cache_ent(ent);
- ent->dirty = 1;
+ lseek(ifd, sizeof(enthdr), SEEK_SET);
+ for (i = 0; i < enthdr.nents; i++) {
+ ent = alloc_ent();
+ if (xread(ifd, ent, sizeof(*ent)) == 0)
+ errx(1, "read: unexpected EOF");
+
+ ent = grow_ent(ent, ent->nblks);
+ if (xread(ifd, ent->bdescr,
+ ent->nblks * sizeof(ent->bdescr[0])) == 0)
+ errx(1, "read: unexpected EOF");
+
+ if ((*fn)(ent, arg) == WALK_STOP)
+ break;
}
- flush_cache();
- if (verbose)
- fprintf(stderr, "done\n");
+ free(ent);
}
void
init_cache(void)
{
- uint64_t nblks, i;
+ uint64_t nents, i;
- if (verbose)
- fprintf(stderr, "initializing cache...");
- nblks = cachefile_nblks();
+ nents = cache_nents();
lseek(cfd, 0, SEEK_SET);
- for (i = 0; i < nblks; i++) {
- struct blk blk;
- struct cache_ent *ent;
-
- ent = alloc_cache_ent(blk.md, i);
- if (xread(cfd, &ent->data, sizeof(ent->data)) == 0)
- errx(1, "unexpected EOF");
- add_cache_ent(ent);
+ for (i = 0; i < nents; i++) {
+ struct cent *cent;
+
+ cent = alloc_cent();
+ if (xread(cfd, ¢->bdescr, sizeof(cent->bdescr)) == 0)
+ errx(1, "read: unexpected EOF");
+ add_cent(cent);
+
+ if (verbose) {
+ fprintf(stderr, "bdescr.offset: %llu bdescr.size: %llu\n",
+ (unsigned long long)cent->bdescr.offset,
+ (unsigned long long)cent->bdescr.size);
+ }
}
- if (verbose)
- fprintf(stderr, "done\n");
}
void
@@ -530,15 +583,16 @@ init(void)
if (sb.st_size != 0)
xread(ifd, &enthdr, sizeof(enthdr));
- if (cachefile_nblks() != storefile_nblks())
- rebuild_cache();
- else
+ if (cache_nents() != 0)
init_cache();
+ else
+ walk(rebuild_cache, NULL);
}
void
term(void)
{
+ flush_cache();
free_cache();
fsync(ifd);
@@ -560,6 +614,7 @@ usage(void)
int
main(int argc, char *argv[])
{
+ uint8_t md[MDSIZ];
char *id = NULL, *root = NULL;
int fd = -1, lflag = 0, cflag = 0;
@@ -611,19 +666,20 @@ main(int argc, char *argv[])
init();
if (cflag) {
- check();
+ walk(check, NULL);
term();
return 0;
}
if (lflag) {
- list();
+ walk(list, NULL);
term();
return 0;
}
if (id) {
- extract(id, fd);
+ str2bin(id, md);
+ walk(extract, &(struct extract_args){ .md = md, .fd = fd });
} else {
dedup(fd);
}
diff --git a/sha256.c b/sha256.c
@@ -1,261 +0,0 @@
-/*
- * FIPS-180-2 compliant SHA-256 implementation
- *
- * Copyright (C) 2001-2003 Christophe Devine
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string.h>
-
-#include "sha256.h"
-
-#define GET_UINT32(n,b,i) \
-{ \
- (n) = ( (uint32) (b)[(i) ] << 24 ) \
- | ( (uint32) (b)[(i) + 1] << 16 ) \
- | ( (uint32) (b)[(i) + 2] << 8 ) \
- | ( (uint32) (b)[(i) + 3] ); \
-}
-
-#define PUT_UINT32(n,b,i) \
-{ \
- (b)[(i) ] = (uint8) ( (n) >> 24 ); \
- (b)[(i) + 1] = (uint8) ( (n) >> 16 ); \
- (b)[(i) + 2] = (uint8) ( (n) >> 8 ); \
- (b)[(i) + 3] = (uint8) ( (n) ); \
-}
-
-void sha256_starts( sha256_context *ctx )
-{
- ctx->total[0] = 0;
- ctx->total[1] = 0;
-
- ctx->state[0] = 0x6A09E667;
- ctx->state[1] = 0xBB67AE85;
- ctx->state[2] = 0x3C6EF372;
- ctx->state[3] = 0xA54FF53A;
- ctx->state[4] = 0x510E527F;
- ctx->state[5] = 0x9B05688C;
- ctx->state[6] = 0x1F83D9AB;
- ctx->state[7] = 0x5BE0CD19;
-}
-
-void sha256_process( sha256_context *ctx, uint8 data[64] )
-{
- uint32 temp1, temp2, W[64];
- uint32 A, B, C, D, E, F, G, H;
-
- GET_UINT32( W[0], data, 0 );
- GET_UINT32( W[1], data, 4 );
- GET_UINT32( W[2], data, 8 );
- GET_UINT32( W[3], data, 12 );
- GET_UINT32( W[4], data, 16 );
- GET_UINT32( W[5], data, 20 );
- GET_UINT32( W[6], data, 24 );
- GET_UINT32( W[7], data, 28 );
- GET_UINT32( W[8], data, 32 );
- GET_UINT32( W[9], data, 36 );
- GET_UINT32( W[10], data, 40 );
- GET_UINT32( W[11], data, 44 );
- GET_UINT32( W[12], data, 48 );
- GET_UINT32( W[13], data, 52 );
- GET_UINT32( W[14], data, 56 );
- GET_UINT32( W[15], data, 60 );
-
-#define SHR(x,n) ((x & 0xFFFFFFFF) >> n)
-#define ROTR(x,n) (SHR(x,n) | (x << (32 - n)))
-
-#define S0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3))
-#define S1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10))
-
-#define S2(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22))
-#define S3(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25))
-
-#define F0(x,y,z) ((x & y) | (z & (x | y)))
-#define F1(x,y,z) (z ^ (x & (y ^ z)))
-
-#define R(t) \
-( \
- W[t] = S1(W[t - 2]) + W[t - 7] + \
- S0(W[t - 15]) + W[t - 16] \
-)
-
-#define P(a,b,c,d,e,f,g,h,x,K) \
-{ \
- temp1 = h + S3(e) + F1(e,f,g) + K + x; \
- temp2 = S2(a) + F0(a,b,c); \
- d += temp1; h = temp1 + temp2; \
-}
-
- A = ctx->state[0];
- B = ctx->state[1];
- C = ctx->state[2];
- D = ctx->state[3];
- E = ctx->state[4];
- F = ctx->state[5];
- G = ctx->state[6];
- H = ctx->state[7];
-
- P( A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98 );
- P( H, A, B, C, D, E, F, G, W[ 1], 0x71374491 );
- P( G, H, A, B, C, D, E, F, W[ 2], 0xB5C0FBCF );
- P( F, G, H, A, B, C, D, E, W[ 3], 0xE9B5DBA5 );
- P( E, F, G, H, A, B, C, D, W[ 4], 0x3956C25B );
- P( D, E, F, G, H, A, B, C, W[ 5], 0x59F111F1 );
- P( C, D, E, F, G, H, A, B, W[ 6], 0x923F82A4 );
- P( B, C, D, E, F, G, H, A, W[ 7], 0xAB1C5ED5 );
- P( A, B, C, D, E, F, G, H, W[ 8], 0xD807AA98 );
- P( H, A, B, C, D, E, F, G, W[ 9], 0x12835B01 );
- P( G, H, A, B, C, D, E, F, W[10], 0x243185BE );
- P( F, G, H, A, B, C, D, E, W[11], 0x550C7DC3 );
- P( E, F, G, H, A, B, C, D, W[12], 0x72BE5D74 );
- P( D, E, F, G, H, A, B, C, W[13], 0x80DEB1FE );
- P( C, D, E, F, G, H, A, B, W[14], 0x9BDC06A7 );
- P( B, C, D, E, F, G, H, A, W[15], 0xC19BF174 );
- P( A, B, C, D, E, F, G, H, R(16), 0xE49B69C1 );
- P( H, A, B, C, D, E, F, G, R(17), 0xEFBE4786 );
- P( G, H, A, B, C, D, E, F, R(18), 0x0FC19DC6 );
- P( F, G, H, A, B, C, D, E, R(19), 0x240CA1CC );
- P( E, F, G, H, A, B, C, D, R(20), 0x2DE92C6F );
- P( D, E, F, G, H, A, B, C, R(21), 0x4A7484AA );
- P( C, D, E, F, G, H, A, B, R(22), 0x5CB0A9DC );
- P( B, C, D, E, F, G, H, A, R(23), 0x76F988DA );
- P( A, B, C, D, E, F, G, H, R(24), 0x983E5152 );
- P( H, A, B, C, D, E, F, G, R(25), 0xA831C66D );
- P( G, H, A, B, C, D, E, F, R(26), 0xB00327C8 );
- P( F, G, H, A, B, C, D, E, R(27), 0xBF597FC7 );
- P( E, F, G, H, A, B, C, D, R(28), 0xC6E00BF3 );
- P( D, E, F, G, H, A, B, C, R(29), 0xD5A79147 );
- P( C, D, E, F, G, H, A, B, R(30), 0x06CA6351 );
- P( B, C, D, E, F, G, H, A, R(31), 0x14292967 );
- P( A, B, C, D, E, F, G, H, R(32), 0x27B70A85 );
- P( H, A, B, C, D, E, F, G, R(33), 0x2E1B2138 );
- P( G, H, A, B, C, D, E, F, R(34), 0x4D2C6DFC );
- P( F, G, H, A, B, C, D, E, R(35), 0x53380D13 );
- P( E, F, G, H, A, B, C, D, R(36), 0x650A7354 );
- P( D, E, F, G, H, A, B, C, R(37), 0x766A0ABB );
- P( C, D, E, F, G, H, A, B, R(38), 0x81C2C92E );
- P( B, C, D, E, F, G, H, A, R(39), 0x92722C85 );
- P( A, B, C, D, E, F, G, H, R(40), 0xA2BFE8A1 );
- P( H, A, B, C, D, E, F, G, R(41), 0xA81A664B );
- P( G, H, A, B, C, D, E, F, R(42), 0xC24B8B70 );
- P( F, G, H, A, B, C, D, E, R(43), 0xC76C51A3 );
- P( E, F, G, H, A, B, C, D, R(44), 0xD192E819 );
- P( D, E, F, G, H, A, B, C, R(45), 0xD6990624 );
- P( C, D, E, F, G, H, A, B, R(46), 0xF40E3585 );
- P( B, C, D, E, F, G, H, A, R(47), 0x106AA070 );
- P( A, B, C, D, E, F, G, H, R(48), 0x19A4C116 );
- P( H, A, B, C, D, E, F, G, R(49), 0x1E376C08 );
- P( G, H, A, B, C, D, E, F, R(50), 0x2748774C );
- P( F, G, H, A, B, C, D, E, R(51), 0x34B0BCB5 );
- P( E, F, G, H, A, B, C, D, R(52), 0x391C0CB3 );
- P( D, E, F, G, H, A, B, C, R(53), 0x4ED8AA4A );
- P( C, D, E, F, G, H, A, B, R(54), 0x5B9CCA4F );
- P( B, C, D, E, F, G, H, A, R(55), 0x682E6FF3 );
- P( A, B, C, D, E, F, G, H, R(56), 0x748F82EE );
- P( H, A, B, C, D, E, F, G, R(57), 0x78A5636F );
- P( G, H, A, B, C, D, E, F, R(58), 0x84C87814 );
- P( F, G, H, A, B, C, D, E, R(59), 0x8CC70208 );
- P( E, F, G, H, A, B, C, D, R(60), 0x90BEFFFA );
- P( D, E, F, G, H, A, B, C, R(61), 0xA4506CEB );
- P( C, D, E, F, G, H, A, B, R(62), 0xBEF9A3F7 );
- P( B, C, D, E, F, G, H, A, R(63), 0xC67178F2 );
-
- ctx->state[0] += A;
- ctx->state[1] += B;
- ctx->state[2] += C;
- ctx->state[3] += D;
- ctx->state[4] += E;
- ctx->state[5] += F;
- ctx->state[6] += G;
- ctx->state[7] += H;
-}
-
-void sha256_update( sha256_context *ctx, uint8 *input, uint32 length )
-{
- uint32 left, fill;
-
- if( ! length ) return;
-
- left = ctx->total[0] & 0x3F;
- fill = 64 - left;
-
- ctx->total[0] += length;
- ctx->total[0] &= 0xFFFFFFFF;
-
- if( ctx->total[0] < length )
- ctx->total[1]++;
-
- if( left && length >= fill )
- {
- memcpy( (void *) (ctx->buffer + left),
- (void *) input, fill );
- sha256_process( ctx, ctx->buffer );
- length -= fill;
- input += fill;
- left = 0;
- }
-
- while( length >= 64 )
- {
- sha256_process( ctx, input );
- length -= 64;
- input += 64;
- }
-
- if( length )
- {
- memcpy( (void *) (ctx->buffer + left),
- (void *) input, length );
- }
-}
-
-static uint8 sha256_padding[64] =
-{
- 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-void sha256_finish( sha256_context *ctx, uint8 digest[32] )
-{
- uint32 last, padn;
- uint32 high, low;
- uint8 msglen[8];
-
- high = ( ctx->total[0] >> 29 )
- | ( ctx->total[1] << 3 );
- low = ( ctx->total[0] << 3 );
-
- PUT_UINT32( high, msglen, 0 );
- PUT_UINT32( low, msglen, 4 );
-
- last = ctx->total[0] & 0x3F;
- padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
-
- sha256_update( ctx, sha256_padding, padn );
- sha256_update( ctx, msglen, 8 );
-
- PUT_UINT32( ctx->state[0], digest, 0 );
- PUT_UINT32( ctx->state[1], digest, 4 );
- PUT_UINT32( ctx->state[2], digest, 8 );
- PUT_UINT32( ctx->state[3], digest, 12 );
- PUT_UINT32( ctx->state[4], digest, 16 );
- PUT_UINT32( ctx->state[5], digest, 20 );
- PUT_UINT32( ctx->state[6], digest, 24 );
- PUT_UINT32( ctx->state[7], digest, 28 );
-}
diff --git a/sha256.h b/sha256.h
@@ -1,24 +0,0 @@
-#ifndef _SHA256_H
-#define _SHA256_H
-
-#ifndef uint8
-#define uint8 unsigned char
-#endif
-
-#ifndef uint32
-#define uint32 unsigned long int
-#endif
-
-typedef struct
-{
- uint32 total[2];
- uint32 state[8];
- uint8 buffer[64];
-}
-sha256_context;
-
-void sha256_starts( sha256_context *ctx );
-void sha256_update( sha256_context *ctx, uint8 *input, uint32 length );
-void sha256_finish( sha256_context *ctx, uint8 digest[32] );
-
-#endif /* sha256.h */