diff options
| author | Marc Vertes <mvertes@free.fr> | 2024-10-03 22:31:22 +0200 |
|---|---|---|
| committer | Marc Vertes <mvertes@free.fr> | 2024-10-03 22:31:22 +0200 |
| commit | a5f74f1b1618863b8489bd6fede8222cb9e6d400 (patch) | |
| tree | e19d599f8eb8cf9398934228d7eda47efa08e63e | |
| parent | 282149e530d1d19fc9903b0a688de5b794540f48 (diff) | |
add unflatenc and unchunkify
| -rw-r--r-- | README.md | 37 | ||||
| -rw-r--r-- | main.go | 17 |
2 files changed, 47 insertions, 7 deletions
@@ -2,7 +2,7 @@ Incremental encrypted backup system -## Current design +## design v0 1. cksum original (sha256) 2. compress (gzip) @@ -17,17 +17,45 @@ Good: - chunks are named from their compressed/crypted hmac. Problems: -- the salt (or iv in aes) must be set to 0. Weak encryption. +- the salt (or iv in aes) must be static, to make the encryption + idempotent, otherwise no dedup. Weak encryption. - dedup occurs only for append only files. The same chunk content will lead to a different hmac if located at a different offset. -To fix: +## design v1 + - chunk before compression -- name chunks from cksum of uncompressed/unencrypted data. +- name chunks from checksum of uncompressed/unencrypted data (invariant => allow dedup). - then compress and encrypt (in this order). Chunk encryption can use randomized cipher, but a hmac must be added at end of file (before encrypt) to check integrity without having to decrypt/decompress. +This is achieved through aes-gcm. + +Problems: +- possible collisions of chunks with same name (same content) but encrypted + with a foreign key (different user), which would a user to download a block + which he could not decrypt. + +## design v2 + +Each user has a fixed unique id: random 96 bits (12 bytes). This id is added +to the content of each block / file prior to compute the invariant checksum +but is not transmitted (no storage overhead). + +It allows to avoid collisions between same original content blocks in different +users. Dedup should only happen in the same user space, as one can not decrypt +a block from another user. + +Problems: +- in this design, and all previous ones, there is no way to disgard data in an + archive. For example, tarsnap does not allow to suppress data. + +## Roadmap + +disgarded: +- encode checksums in base64 instead of hex. Wrong idea: incompatible with case + insensitive filesystems (macos). ## What tarsnap is doing @@ -37,7 +65,6 @@ file (before encrypt) to check integrity without having to decrypt/decompress. 4. compress chunk (deflate) 5. encrypt chunk (rsa2048) + HMAC - ## References - tarsnap: https://www.tarsnap.com https://github.com/tarsnap/tarsnap @@ -215,6 +215,19 @@ func chunkify(root, name string, data, key []byte) error { return flatenc(root, name, index, key) } +func unchunkify(root, name string, single bool, key []byte) ([]byte, error) { + d, err := unflatenc(root, name, key) + if single || err != nil { + return d, err + } + sums := split(d, 32) + raw := []byte{} + for _, sum := range sums { + log.Printf("sum %x\n", sum) + } + return raw, nil +} + func flatenc(root, name string, data, key []byte) error { // Flatten data. var buf bytes.Buffer @@ -246,7 +259,7 @@ func flatenc(root, name string, data, key []byte) error { return writeCksumFile(filepath.Join(root, ".bb", "chunks"), name, append(iv, enc...)) } -func unflatenc(root, name string, key []byte) (raw []byte, err error) { +func unflatenc(root, name string, key []byte) ([]byte, error) { enc, err := os.ReadFile(filepath.Join(root, ".bb", "chunks", name[:2], name[2:])) if err != nil { return nil, err @@ -302,7 +315,7 @@ func main() { if err != nil { log.Fatal(err) } - d, err := unflatenc(wd, *rfile, key) + d, err := unchunkify(wd, *rfile, false, key) log.Println("d", err, string(d)) // log.Printf("d: %d %v %x\n", len(d), err, d) return |
