summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2024-10-02 22:43:42 +0200
committerMarc Vertes <mvertes@free.fr>2024-10-02 22:43:42 +0200
commit282149e530d1d19fc9903b0a688de5b794540f48 (patch)
tree4074b7d6bff00f9f92b271c7bfa03f79bad7e65c
parent9ec19922addee0137f083c66b458c6aefe59191a (diff)
fix chunkify
-rw-r--r--README.md46
-rw-r--r--main.go256
2 files changed, 277 insertions, 25 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..50604d0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,46 @@
+# bb: basic backup
+
+Incremental encrypted backup system
+
+## Current design
+
+1. cksum original (sha256)
+2. compress (gzip)
+2. encrypt (aes256)
+3. split in cksumed chunks. chunks are named from the hmac of
+ encrypted+compressed
+4. build index of chunks
+5. compress (gzip) and encrypt (aes) index
+6. return index cksum
+
+Good:
+- chunks are named from their compressed/crypted hmac.
+
+Problems:
+- the salt (or iv in aes) must be set to 0. Weak encryption.
+- dedup occurs only for append only files. The same chunk content will lead to
+ a different hmac if located at a different offset.
+
+To fix:
+- chunk before compression
+- name chunks from cksum of uncompressed/unencrypted data.
+- then compress and encrypt (in this order).
+
+Chunk encryption can use randomized cipher, but a hmac must be added at end of
+file (before encrypt) to check integrity without having to decrypt/decompress.
+
+## What tarsnap is doing
+
+1. cksum original (sha256)
+2. build chunks of variable size
+3. cksum uncompressed unencrypted chunks
+4. compress chunk (deflate)
+5. encrypt chunk (rsa2048) + HMAC
+
+
+## References
+
+- tarsnap: https://www.tarsnap.com https://github.com/tarsnap/tarsnap
+- chunker: https://github.com/karinushka/chunker
+- borg: https://borgbackup.org
+- rclone: https://rclone.org
diff --git a/main.go b/main.go
index 9cb32f2..ddc9782 100644
--- a/main.go
+++ b/main.go
@@ -2,27 +2,57 @@ package main
import (
"bufio"
+ "bytes"
+ "compress/flate"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/rand"
"crypto/sha256"
+ "errors"
+ "flag"
"fmt"
+ "io"
"io/fs"
"log"
"net/url"
"os"
+ "path"
"path/filepath"
"time"
)
type metadata struct {
size, mtime, mode int64
- // sum [sha256.Size]byte
- sum []byte
+ sum [sha256.Size]byte
}
type metamap map[string]metadata
+const chunkMax = 1 << 17 // 131072
+
func getIndex(root string, mm metamap) (index string, err error) {
- filesystem := os.DirFS(root)
- err = fs.WalkDir(filesystem, ".", func(path string, d fs.DirEntry, err error) error {
+ rfs := os.DirFS(root)
+
+ // Get the stored private encryption key.
+ key, err := fs.ReadFile(rfs, filepath.Join(".bb", "key"))
+ if err != nil {
+ return "", err
+ }
+ log.Printf("key: %x\n", key)
+
+ // Get the exclude file.
+ xc, err := getExclude(root)
+ if err != nil {
+ return "", err
+ }
+ xc = append(xc, ".bb")
+ log.Println("xc", xc)
+
+ // Walk the file tree to perform:
+ // - identification of changed files since previous backups
+ // - blockification of changed files
+ // - construction of backup index
+ err = fs.WalkDir(rfs, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
@@ -30,23 +60,30 @@ func getIndex(root string, mm metamap) (index string, err error) {
if err != nil {
return err
}
+ if isExcluded(path, d.Name(), xc) {
+ return fs.SkipDir
+ }
if info.IsDir() {
- if d.Name() == ".bb" {
- return fs.SkipDir
- }
return nil
}
- b, err := os.ReadFile(path)
- if err != nil {
- return err
- }
-
- // md := metadata{info.Size(), info.ModTime().Unix(), int64(info.Mode()), sha256.Sum256(b)}
- a := sha256.Sum256(b)
- md := metadata{info.Size(), info.ModTime().Unix(), int64(info.Mode()), a[:]}
+ md := metadata{size: info.Size(), mtime: info.ModTime().Unix(), mode: int64(info.Mode())}
- log.Println(path, md, mm[path])
+ // Recompute file checksum only if file size, mode or mtime has changed.
+ if o, ok := mm[path]; ok && o.size == md.size && o.mtime == md.mtime && o.mode == md.mode {
+ md.sum = o.sum
+ } else {
+ b, err := os.ReadFile(path)
+ if err != nil {
+ return err
+ }
+ md.sum = sha256.Sum256(b)
+ log.Printf("archive %s %x\n", path, md.sum)
+ // chunk here, could be done in goroutine.
+ if err := chunkify(root, fmt.Sprintf("%x", md.sum), b, key); err != nil {
+ return err
+ }
+ }
index += fmt.Sprintf("%s %d %d %o %x\n", url.PathEscape(path), md.size, md.mtime, md.mode, md.sum)
return nil
@@ -54,6 +91,38 @@ func getIndex(root string, mm metamap) (index string, err error) {
return index, err
}
+func getExclude(root string) (str []string, err error) {
+ f, err := os.Open(filepath.Join(root, ".bb", "exclude"))
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return str, err
+ }
+ defer f.Close()
+ scan := bufio.NewScanner(f)
+ for scan.Scan() {
+ if s := scan.Text(); len(s) > 0 {
+ str = append(str, s)
+ }
+ }
+ return str, scan.Err()
+}
+
+func isExcluded(path, base string, excludes []string) bool {
+ for _, x := range excludes {
+ if match(base, x) || match(path, x) {
+ return true
+ }
+ }
+ return false
+}
+
+func match(pattern, name string) bool {
+ if matched, err := path.Match(pattern, name); err != nil {
+ panic(err)
+ } else {
+ return matched
+ }
+}
+
func readIndex(path string) (md metamap, err error) {
f, err := os.Open(path)
if err != nil {
@@ -63,32 +132,49 @@ func readIndex(path string) (md metamap, err error) {
md = metamap{}
scan := bufio.NewScanner(f)
for scan.Scan() {
- var p string
- var d metadata
- n, err := fmt.Sscanf(scan.Text(), "%s %d %d %o %64x", &p, &d.size, &d.mtime, &d.mode, &d.sum)
+ var (
+ p string
+ d metadata
+ s []byte
+ )
+ n, err := fmt.Sscanf(scan.Text(), "%s %d %d %o %64x", &p, &d.size, &d.mtime, &d.mode, &s)
if err != nil || n != 5 {
return md, err
}
+ copy(d.sum[:], s)
path, err := url.PathUnescape(p)
if err != nil {
return md, err
}
md[path] = d
}
- if err := scan.Err(); err != nil {
- return md, err
- }
- return md, nil
+ return md, scan.Err()
}
func initBB(root string) (current, previous string, err error) {
- if err = os.MkdirAll(filepath.Join(root, ".bb", "data"), 0o750); err != nil {
+ if err = os.MkdirAll(filepath.Join(root, ".bb"), 0o750); err != nil {
return "", "", err
}
- prevs, _ := fs.Glob(os.DirFS(root), filepath.Join(".bb", "index-*"))
+ rfs := os.DirFS(root)
+
+ // Create a private encryption if it doesn't already exists.
+ if _, err := fs.Stat(rfs, filepath.Join(".bb", "key")); errors.Is(err, fs.ErrNotExist) {
+ buf := make([]byte, 32)
+ if _, err := rand.Read(buf); err != nil {
+ return "", "", err
+ }
+ if err := os.WriteFile(filepath.Join(root, ".bb", "key"), buf, 0o600); err != nil {
+ return "", "", err
+ }
+ }
+
+ // Retrieve the most recent backup index name.
+ prevs, _ := fs.Glob(rfs, filepath.Join(".bb", "index-*"))
if len(prevs) > 0 {
previous = prevs[len(prevs)-1]
}
+
+ // Create a current backup index
now := time.Now()
y, m, d := now.Date()
h, mn, s := now.Clock()
@@ -96,12 +182,132 @@ func initBB(root string) (current, previous string, err error) {
return current, previous, nil
}
+// chunkify reads data and writes fixed size encrypted compressed blocks.
+func chunkify(root, name string, data, key []byte) error {
+ // Steps:
+ // 1. checksum source file (done by caller), this will be the file index name
+ // 2. split in chunks. For each chunk, do:
+ // 1. checksum the chunk, before compression/encryption. This will be the chunk name.
+ // 1. compress the chunk (deflate)
+ // 3. encrypt and authentify the result (aes-gcm)
+ // 4. write in chunk name (see above)
+ // 5 add chunk name to file index
+ // 3. compress and encrypt file index as above.
+
+ if len(data) <= chunkMax {
+ return flatenc(root, name, data, key)
+ }
+
+ // Split data in fixed size chunks.
+ chunks := split(data, chunkMax)
+ index := []byte{}
+ log.Println("chunkify", name)
+ log.Println("nchunks:", len(chunks), len(chunks[0]))
+
+ for i, c := range chunks {
+ sum := sha256.Sum256(c)
+ index = append(index, sum[:]...)
+ if err := flatenc(root, fmt.Sprintf("%x", sum), c, key); err != nil {
+ return fmt.Errorf("chunkify %s block %d: %w", name, i, err)
+ }
+ }
+ log.Println("file index:", name)
+ return flatenc(root, name, index, key)
+}
+
+func flatenc(root, name string, data, key []byte) error {
+ // Flatten data.
+ var buf bytes.Buffer
+ zw, _ := flate.NewWriter(&buf, flate.DefaultCompression)
+ if _, err := zw.Write(data); err != nil {
+ return fmt.Errorf("flatenc flatten write: %w", err)
+ }
+ if err := zw.Close(); err != nil {
+ return fmt.Errorf("flatenc flatten close: %w", err)
+ }
+
+ // Encrypt and authentify flattened data.
+ cb, err := aes.NewCipher(key)
+ if err != nil {
+ return fmt.Errorf("flatenc cipher: %w", err)
+ }
+ aesgcm, err := cipher.NewGCM(cb)
+ if err != nil {
+ return fmt.Errorf("flatenc gcm: %w", err)
+ }
+ iv := make([]byte, aesgcm.NonceSize())
+ if _, err := rand.Read(iv); err != nil {
+ return fmt.Errorf("flatenc iv: %w", err)
+ }
+ log.Printf("iv: %d %x\n", len(iv), iv)
+ enc := aesgcm.Seal(nil, iv, buf.Bytes(), nil)
+
+ // Write the result to a file named from checksum of original content.
+ return writeCksumFile(filepath.Join(root, ".bb", "chunks"), name, append(iv, enc...))
+}
+
+func unflatenc(root, name string, key []byte) (raw []byte, err error) {
+ enc, err := os.ReadFile(filepath.Join(root, ".bb", "chunks", name[:2], name[2:]))
+ if err != nil {
+ return nil, err
+ }
+ cb, err := aes.NewCipher(key)
+ if err != nil {
+ return nil, fmt.Errorf("unflatenc cipher: %w", err)
+ }
+ aesgcm, err := cipher.NewGCM(cb)
+ if err != nil {
+ return nil, fmt.Errorf("unflatenc gcm: %w", err)
+ }
+ l := aesgcm.NonceSize()
+ dec, err := aesgcm.Open(nil, enc[:l], enc[l:], nil)
+ if err != nil {
+ return nil, fmt.Errorf("unflatenc open: %w", err)
+ }
+ return io.ReadAll(flate.NewReader(bytes.NewBuffer(dec)))
+}
+
+func split(data []byte, size int) (chunks [][]byte) {
+ offset := 0
+ for offset+size < len(data) {
+ chunks = append(chunks, data[offset:offset+size])
+ offset += size
+ }
+ chunks = append(chunks, data[offset:])
+ return chunks
+}
+
+func writeCksumFile(prefix, name string, data []byte) error {
+ head, tail := name[:2], name[2:]
+ if err := os.MkdirAll(filepath.Join(prefix, head), 0o750); err != nil {
+ return err
+ }
+ return os.WriteFile(filepath.Join(prefix, head, tail), data, 0o640)
+}
+
func main() {
log.SetFlags(log.Lshortfile)
+
+ rfile := flag.String("read", "", "a cksum file")
+ flag.Parse()
+
wd, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
+
+ if *rfile != "" {
+ log.Println("rfile:", *rfile)
+ key, err := os.ReadFile(filepath.Join(wd, ".bb", "key"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ d, err := unflatenc(wd, *rfile, key)
+ log.Println("d", err, string(d))
+ // log.Printf("d: %d %v %x\n", len(d), err, d)
+ return
+ }
+
index, oldindex, err := initBB(wd)
if err != nil {
log.Fatal(err)