summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Vertes <mvertes@free.fr>2024-10-07 22:59:29 +0200
committerMarc Vertes <mvertes@free.fr>2024-10-07 22:59:29 +0200
commit1121b9e07310ad935dccb36d2678b56e9fab777c (patch)
tree8ef4765d79847805d74d5a229ef6ec7dbfc7cf8c
parentd04ff4c23a6c1a64fc0488c5bf5af7a4ceffd4a3 (diff)
write to external repomain
-rw-r--r--README.md23
-rw-r--r--main.go447
2 files changed, 310 insertions, 160 deletions
diff --git a/README.md b/README.md
index 68d9a2b..cad55d3 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
Incremental encrypted backup system
-## design v0
+## Archive v0
1. cksum original (sha256)
2. compress (gzip)
@@ -22,7 +22,7 @@ Problems:
- dedup occurs only for append only files. The same chunk content will lead to
a different hmac if located at a different offset.
-## design v1
+## Archive v1
- chunk before compression
- name chunks from checksum of uncompressed/unencrypted data (invariant => allow dedup).
@@ -37,7 +37,7 @@ Problems:
with a foreign key (different user), which would a user to download a block
which he could not decrypt.
-## design v2
+## Archive v2
Each user has a fixed unique id: random 96 bits (12 bytes). This id is added
to the content of each block / file prior to compute the invariant checksum
@@ -51,8 +51,22 @@ Problems:
- in this design, and all previous ones, there is no way to disgard data in an
archive. For example, tarsnap does not allow to suppress data.
+## Repository
+
+A repo is associated with a single id/key tuple, ensuring a deduplification space,
+i.e. a unique `chunks` directory.
+
+Each backup is denoted by its archive index $host:$dir:$date
+
+$dir is the rootdir of the backuped files.
+
+A repo entrypoint is its current index, containing the list of $host:$dir:$date.
+
## Roadmap
+- p2p storage
+- chunker based on rolling hash (instead of fixed size)
+
disgarded:
- encode checksums in base64 instead of hex. Wrong idea: incompatible with case
insensitive filesystems (macos).
@@ -68,6 +82,7 @@ disgarded:
## References
- tarsnap: https://www.tarsnap.com https://github.com/tarsnap/tarsnap
-- chunker: https://github.com/karinushka/chunker
+- tarsnap chunker in Go: https://github.com/karinushka/chunker
- borg: https://borgbackup.org
- rclone: https://rclone.org
+- restic: https://restic.readthedocs.io/en/v0.2.0/Design/
diff --git a/main.go b/main.go
index edb32fe..cd6203f 100644
--- a/main.go
+++ b/main.go
@@ -18,41 +18,79 @@ import (
"os"
"path"
"path/filepath"
+ "strings"
"time"
)
-type metadata struct {
+type fileInfo struct {
size, mtime, mode int64
sum [sha256.Size]byte
}
-type metamap map[string]metadata
+type fileMap map[string]fileInfo
const chunkMax = 1 << 17 // 131072
-func getIndex(root string, mm metamap) (index string, err error) {
- rfs := os.DirFS(root)
+type user struct {
+ id, key []byte
+ config, cache, host, repo string
+ ignore []string
+}
- // Get the stored private encryption key.
- key, err := fs.ReadFile(rfs, filepath.Join(".bb", "key"))
+func initUser(repo string) (*user, error) {
+ if repo == "" {
+ return nil, errors.New("repo missing")
+ }
+ config, err := os.UserConfigDir()
if err != nil {
- return "", err
+ return nil, err
}
- log.Printf("key: %x\n", key)
-
- // Get the exclude file.
- xc, err := getExclude(root)
+ cache, err := os.UserCacheDir()
if err != nil {
- return "", err
+ return nil, err
+ }
+ host, err := os.Hostname()
+ if err != nil {
+ return nil, err
+ }
+ abs, err := filepath.Abs(repo)
+ if err != nil {
+ return nil, err
+ }
+ u := user{
+ config: filepath.Join(config, "bb"),
+ cache: filepath.Join(cache, "bb"),
+ host: host,
+ repo: abs,
+ }
+ u.ignore = linesFile(filepath.Join(u.config, "ignore"))
+ k, err := os.ReadFile(filepath.Join(u.config, "key"))
+ if err == nil {
+ u.id, u.key = k[:12], k[12:]
+ return &u, nil
+ }
+
+ // Create a key and save it.
+ buf := make([]byte, 12+32)
+ if _, err := rand.Read(buf); err != nil {
+ return nil, err
+ }
+ u.id, u.key = buf[:12], buf[12:]
+ if err := os.MkdirAll(u.config, 0o755); err != nil {
+ return nil, err
}
- xc = append(xc, ".bb")
- log.Println("xc", xc)
+ return &u, os.WriteFile(filepath.Join(u.config, "key"), buf, 0o600)
+}
- // Walk the file tree to perform:
- // - identification of changed files since previous backups
- // - blockification of changed files
- // - construction of backup index
- err = fs.WalkDir(rfs, ".", func(path string, d fs.DirEntry, err error) error {
+func getIndex(u *user, root string, mm fileMap) (string, error) {
+ rfs := os.DirFS(root)
+
+ // Walk the fs tree to perform:
+ // - Identification of changed files since the previous backup (if any)
+ // - Chunkification of changed files
+ // - Generation of backup index
+ var index string
+ if err := fs.WalkDir(rfs, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
@@ -60,17 +98,17 @@ func getIndex(root string, mm metamap) (index string, err error) {
if err != nil {
return err
}
- if isExcluded(path, d.Name(), xc) {
+ if isExcluded(path, d.Name(), u.ignore) {
return fs.SkipDir
}
if info.IsDir() {
return nil
}
- md := metadata{size: info.Size(), mtime: info.ModTime().Unix(), mode: int64(info.Mode())}
+ md := fileInfo{size: info.Size(), mtime: info.ModTime().Unix(), mode: int64(info.Mode())}
- // Recompute file checksum only if file size, mode or mtime has changed.
- if o, ok := mm[path]; ok && o.size == md.size && o.mtime == md.mtime && o.mode == md.mode {
+ // Recompute file checksum only if file size or mtime has changed.
+ if o, ok := mm[path]; ok && o.size == md.size && o.mtime == md.mtime {
md.sum = o.sum
} else {
b, err := os.ReadFile(path)
@@ -79,31 +117,19 @@ func getIndex(root string, mm metamap) (index string, err error) {
}
md.sum = sha256.Sum256(b)
log.Printf("archive %s %x\n", path, md.sum)
- // chunk here, could be done in goroutine.
- if err := chunkify(root, fmt.Sprintf("%x", md.sum), b, key); err != nil {
+ if err := chunkify(u, root, fmt.Sprintf("%x", md.sum), b); err != nil {
return err
}
}
index += fmt.Sprintf("%s %d %d %o %x\n", url.PathEscape(path), md.size, md.mtime, md.mode, md.sum)
return nil
- })
- return index, err
-}
-
-func getExclude(root string) (str []string, err error) {
- f, err := os.Open(filepath.Join(root, ".bb", "exclude"))
- if err != nil && !errors.Is(err, os.ErrNotExist) {
- return str, err
- }
- defer f.Close()
- scan := bufio.NewScanner(f)
- for scan.Scan() {
- if s := scan.Text(); len(s) > 0 {
- str = append(str, s)
- }
+ }); err != nil {
+ return "", err
}
- return str, scan.Err()
+ sum := sha256.Sum256([]byte(index))
+ name := fmt.Sprintf("%x", sum)
+ return name, flatenc(u, name, []byte(index))
}
func isExcluded(path, base string, excludes []string) bool {
@@ -116,74 +142,14 @@ func isExcluded(path, base string, excludes []string) bool {
}
func match(pattern, name string) bool {
- if matched, err := path.Match(pattern, name); err != nil {
- panic(err)
- } else {
- return matched
- }
-}
-
-func readIndex(path string) (md metamap, err error) {
- f, err := os.Open(path)
- if err != nil {
- return md, err
- }
- defer f.Close()
- md = metamap{}
- scan := bufio.NewScanner(f)
- for scan.Scan() {
- var (
- p string
- d metadata
- s []byte
- )
- n, err := fmt.Sscanf(scan.Text(), "%s %d %d %o %64x", &p, &d.size, &d.mtime, &d.mode, &s)
- if err != nil || n != 5 {
- return md, err
- }
- copy(d.sum[:], s)
- path, err := url.PathUnescape(p)
- if err != nil {
- return md, err
- }
- md[path] = d
- }
- return md, scan.Err()
-}
-
-func initBB(root string) (current, previous string, err error) {
- if err = os.MkdirAll(filepath.Join(root, ".bb"), 0o750); err != nil {
- return "", "", err
- }
- rfs := os.DirFS(root)
-
- // Create a private encryption if it doesn't already exists.
- if _, err := fs.Stat(rfs, filepath.Join(".bb", "key")); errors.Is(err, fs.ErrNotExist) {
- buf := make([]byte, 32)
- if _, err := rand.Read(buf); err != nil {
- return "", "", err
- }
- if err := os.WriteFile(filepath.Join(root, ".bb", "key"), buf, 0o600); err != nil {
- return "", "", err
- }
- }
-
- // Retrieve the most recent backup index name.
- prevs, _ := fs.Glob(rfs, filepath.Join(".bb", "index-*"))
- if len(prevs) > 0 {
- previous = prevs[len(prevs)-1]
+ if m, err := path.Match(pattern, name); err == nil && m {
+ return true
}
-
- // Create a current backup index
- now := time.Now()
- y, m, d := now.Date()
- h, mn, s := now.Clock()
- current = filepath.Join(root, ".bb", fmt.Sprintf("index-%d-%02d%02d-%02d%02d%02d", y, m, d, h, mn, s))
- return current, previous, nil
+ return false
}
-// chunkify reads data and writes fixed size encrypted compressed blocks.
-func chunkify(root, name string, data, key []byte) error {
+// chunkify reads data and writes encrypted compressed chunks.
+func chunkify(u *user, root, name string, data []byte) error {
// Steps:
// 1. checksum source file (done by caller), this will be the file index name
// 2. split in chunks. For each chunk, do:
@@ -195,7 +161,7 @@ func chunkify(root, name string, data, key []byte) error {
// 3. compress and encrypt file index as above.
if len(data) <= chunkMax {
- return flatenc(root, name, data, key)
+ return flatenc(u, name, data)
}
// Split data in fixed size chunks.
@@ -207,23 +173,23 @@ func chunkify(root, name string, data, key []byte) error {
for i, c := range chunks {
sum := sha256.Sum256(c)
index = append(index, sum[:]...)
- if err := flatenc(root, fmt.Sprintf("%x", sum), c, key); err != nil {
+ if err := flatenc(u, fmt.Sprintf("%x", sum), c); err != nil {
return fmt.Errorf("chunkify %s block %d: %w", name, i, err)
}
}
log.Println("file index:", name)
- return flatenc(root, name, index, key)
+ return flatenc(u, name, index)
}
-func unchunkify(root, name string, single bool, key []byte) ([]byte, error) {
- d, err := unflatenc(root, name, key)
+func unchunkify(u *user, name string, single bool) ([]byte, error) {
+ d, err := unflatenc(u, name)
if single || err != nil {
return d, err
}
sums := split(d, 32)
raw := []byte{}
for i, sum := range sums {
- d, err := unflatenc(root, fmt.Sprintf("%x", sum), key)
+ d, err := unflatenc(u, fmt.Sprintf("%x", sum))
if err != nil {
return d, err
}
@@ -235,7 +201,7 @@ func unchunkify(root, name string, single bool, key []byte) ([]byte, error) {
return nil, nil
}
-func flatenc(root, name string, data, key []byte) error {
+func flatenc(u *user, name string, data []byte) error {
// Flatten data.
var buf bytes.Buffer
zw, _ := flate.NewWriter(&buf, flate.DefaultCompression)
@@ -247,7 +213,7 @@ func flatenc(root, name string, data, key []byte) error {
}
// Encrypt and authentify.
- cb, err := aes.NewCipher(key)
+ cb, err := aes.NewCipher(u.key)
if err != nil {
return fmt.Errorf("flatenc cipher: %w", err)
}
@@ -259,45 +225,55 @@ func flatenc(root, name string, data, key []byte) error {
if _, err := rand.Read(iv); err != nil {
return fmt.Errorf("flatenc iv: %w", err)
}
- log.Printf("iv: %d %x\n", len(iv), iv)
enc := aesgcm.Seal(nil, iv, buf.Bytes(), nil)
- // Write to a file named with original checksum.
- return writeCksumFile(filepath.Join(root, ".bb", "chunks"), name, append(iv, enc...))
+ if name == "index" {
+ return os.WriteFile(filepath.Join(u.repo, "index"), append(iv, enc...), 0o644)
+ }
+ return writeChunk(filepath.Join(u.repo, "chunks"), name, append(iv, enc...))
}
-func unflatenc(root, name string, key []byte) ([]byte, error) {
- enc, err := os.ReadFile(filepath.Join(root, ".bb", "chunks", name[:2], name[2:]))
+func unflatenc(u *user, name string) ([]byte, error) {
+ var path string
+ if name == "index" {
+ path = filepath.Join(u.repo, "index")
+ } else {
+ path = filepath.Join(u.repo, "chunks", name[:2], name[2:])
+ }
+ enc, err := os.ReadFile(path)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("unflatenc %s open %w", path, err)
}
- cb, err := aes.NewCipher(key)
+ cb, err := aes.NewCipher(u.key)
if err != nil {
- return nil, fmt.Errorf("unflatenc cipher: %w", err)
+ return nil, fmt.Errorf("unflatenc %s cipher: %w", path, err)
}
- aesgcm, err := cipher.NewGCM(cb)
+ gcm, err := cipher.NewGCM(cb)
if err != nil {
- return nil, fmt.Errorf("unflatenc gcm: %w", err)
+ return nil, fmt.Errorf("unflatenc %s gcm: %w", path, err)
}
- l := aesgcm.NonceSize()
- dec, err := aesgcm.Open(nil, enc[:l], enc[l:], nil)
+ l := gcm.NonceSize()
+ dec, err := gcm.Open(nil, enc[:l], enc[l:], nil)
if err != nil {
- return nil, fmt.Errorf("unflatenc open: %w", err)
+ return nil, fmt.Errorf("unflatenc %s gcm open: %w", path, err)
}
return io.ReadAll(flate.NewReader(bytes.NewBuffer(dec)))
}
+// split divides a data block and returns a slice of fixed size blocks.
func split(data []byte, size int) (chunks [][]byte) {
offset := 0
for offset+size < len(data) {
chunks = append(chunks, data[offset:offset+size])
offset += size
}
- chunks = append(chunks, data[offset:])
- return chunks
+ if offset == len(data) {
+ return chunks
+ }
+ return append(chunks, data[offset:])
}
-func writeCksumFile(prefix, name string, data []byte) error {
+func writeChunk(prefix, name string, data []byte) error {
head, tail := name[:2], name[2:]
if err := os.MkdirAll(filepath.Join(prefix, head), 0o750); err != nil {
return err
@@ -305,46 +281,205 @@ func writeCksumFile(prefix, name string, data []byte) error {
return os.WriteFile(filepath.Join(prefix, head, tail), data, 0o640)
}
-func main() {
- log.SetFlags(log.Lshortfile)
-
- rfile := flag.String("read", "", "a cksum file")
- flag.Parse()
+func lastCacheEntry(name string) string {
+ cache, err := os.UserCacheDir()
+ if err != nil {
+ return ""
+ }
+ if lines := linesFile(filepath.Join(cache, "bb", name)); len(lines) > 0 {
+ return lines[len(lines)-1]
+ }
+ return ""
+}
- wd, err := os.Getwd()
+func linesFile(name string) (lines []string) {
+ f, err := os.Open(name)
if err != nil {
- log.Fatal(err)
+ return nil
+ }
+ scan := bufio.NewScanner(f)
+ for scan.Scan() {
+ lines = append(lines, scan.Text())
}
+ f.Close()
+ return lines
+}
- if *rfile != "" {
- log.Println("rfile:", *rfile)
- key, err := os.ReadFile(filepath.Join(wd, ".bb", "key"))
- if err != nil {
- log.Fatal(err)
+func userUpdate(u *user) error {
+ return nil
+}
+
+func previousIndex(repo, host, root string) string {
+ indexes := linesFile(filepath.Join(repo, "index"))
+ for _, idx := range indexes {
+ if !strings.HasPrefix(idx, host+":"+url.PathEscape(root)+":") {
+ continue
+ }
+ if words := strings.Fields(idx); len(words) > 1 {
+ return words[1]
}
- d, err := unchunkify(wd, *rfile, false, key)
- log.Println("d", err, string(d))
- // log.Printf("d: %d %v %x\n", len(d), err, d)
- return
}
+ return ""
+}
+
+func currentIndex(host, root string) string {
+ now := time.Now()
+ y, m, d := now.Date()
+ h, mn, s := now.Clock()
+ return fmt.Sprintf("%s:%s:%d-%02d%02d-%02d%02d%02d", host, url.PathEscape(root), y, m, d, h, mn, s)
+}
- index, oldindex, err := initBB(wd)
+// parent returns the parent index of archive for incremental update.
+func parent(u *user, root, index string) (fileMap, error) {
+ var psum string
+ for _, line := range strings.Split(index, "\n") {
+ if words := strings.Fields(line); len(words) == 2 && strings.HasPrefix(words[0], u.host+":"+url.PathEscape(root)+":") {
+ psum = words[1]
+ }
+ }
+ if psum == "" {
+ return nil, nil
+ }
+ d, err := unflatenc(u, psum)
if err != nil {
- log.Fatal(err)
+ return nil, nil
}
-
- md := metamap{}
- if oldindex != "" {
- if md, err = readIndex(oldindex); err != nil {
- log.Fatal(err)
+ md := fileMap{}
+ for _, line := range strings.Split(string(d), "\n") {
+ if line == "" {
+ continue
+ }
+ var (
+ p string
+ d fileInfo
+ s []byte
+ )
+ n, err := fmt.Sscanf(line, "%s %d %d %o %64x", &p, &d.size, &d.mtime, &d.mode, &s)
+ if err != nil || n != 5 {
+ return md, err
+ }
+ copy(d.sum[:], s)
+ path, err := url.PathUnescape(p)
+ if err != nil {
+ return md, err
}
+ md[path] = d
+ }
+ return md, nil
+}
+
+func create(args []string) (err error) {
+ flg := flag.NewFlagSet("create", flag.ContinueOnError)
+ flg.Usage = func() { fmt.Println("Usage: bb create [src [dest]]") }
+ if err = flg.Parse(args); err != nil {
+ return err
+ }
+
+ repo := lastCacheEntry("repo")
+ root := lastCacheEntry("root")
+ switch len(flg.Args()) {
+ case 2:
+ root, repo = flg.Arg(0), flg.Arg(1)
+ case 1:
+ root = flg.Arg(0)
+ }
+ if root == "" {
+ return errors.New("root missing")
}
- data, err := getIndex(wd, md)
+ u, err := initUser(repo)
if err != nil {
- log.Fatal(err)
+ return err
+ }
+ abs, err := filepath.Abs(root)
+ if err != nil {
+ return err
+ }
+ p, err := unflatenc(u, "index")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+ index := string(p)
+ md, err := parent(u, abs, index)
+ if err != nil {
+ return err
+ }
+ data, err := getIndex(u, root, md)
+ if err != nil {
+ return err
+ }
+ index += fmt.Sprintf("%s %s\n", currentIndex(u.host, abs), string(data))
+ if err := flatenc(u, "index", []byte(index)); err != nil {
+ return err
+ }
+ return userUpdate(u)
+}
+
+func extract(args []string) (err error) {
+ flg := flag.NewFlagSet("extract", flag.ContinueOnError)
+ flg.Usage = func() { fmt.Println("Usage: bb extract [repo] [src] [dest]") }
+ if err = flg.Parse(args); err != nil {
+ return err
+ }
+ return nil
+}
+
+func list(args []string) (err error) {
+ flg := flag.NewFlagSet("list", flag.ContinueOnError)
+ flg.Usage = func() { fmt.Println("Usage: bb list [repo]") }
+ if err = flg.Parse(args); err != nil {
+ return err
+ }
+ repo := lastCacheEntry("repo")
+ arc := "index"
+ switch flg.NArg() {
+ case 1:
+ repo = flg.Arg(0)
+ case 2:
+ repo = flg.Arg(0)
+ arc = flg.Arg(1)
}
- err = os.WriteFile(index, []byte(data), 0o644)
+ u, err := initUser(repo)
if err != nil {
+ return err
+ }
+ d, err := unflatenc(u, arc)
+ log.Println("index:", string(d), err)
+ return nil
+}
+
+const usage = `bb is a backup tool.
+
+Usage: bb [commands] [arguments]
+
+Commands:
+ create Create a backup archive of a directory
+ extract Extract files from a backup archive
+ list List elements in a backup or a repository
+
+Use "bb <command> -help" for more information about a command.
+`
+
+func main() {
+ var cmd string
+ var err error
+ log.SetFlags(log.Lshortfile)
+
+ if len(os.Args) > 1 {
+ cmd = os.Args[1]
+ }
+ switch cmd {
+ case "create":
+ err = create(os.Args[2:])
+ case "extract":
+ err = extract(os.Args[2:])
+ case "list":
+ err = list(os.Args[2:])
+ default:
+ fmt.Print(usage)
+ }
+ if err != nil && !errors.Is(err, flag.ErrHelp) {
log.Fatal(err)
+ os.Exit(1)
}
+ os.Exit(0)
}