package main import ( "bufio" "bytes" "compress/flate" "crypto/aes" "crypto/cipher" "crypto/rand" "crypto/sha256" "errors" "flag" "fmt" "io" "io/fs" "log" "net/url" "os" "path" "path/filepath" "time" ) type metadata struct { size, mtime, mode int64 sum [sha256.Size]byte } type metamap map[string]metadata const chunkMax = 1 << 17 // 131072 func getIndex(root string, mm metamap) (index string, err error) { rfs := os.DirFS(root) // Get the stored private encryption key. key, err := fs.ReadFile(rfs, filepath.Join(".bb", "key")) if err != nil { return "", err } log.Printf("key: %x\n", key) // Get the exclude file. xc, err := getExclude(root) if err != nil { return "", err } xc = append(xc, ".bb") log.Println("xc", xc) // Walk the file tree to perform: // - identification of changed files since previous backups // - blockification of changed files // - construction of backup index err = fs.WalkDir(rfs, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err } info, err := d.Info() if err != nil { return err } if isExcluded(path, d.Name(), xc) { return fs.SkipDir } if info.IsDir() { return nil } md := metadata{size: info.Size(), mtime: info.ModTime().Unix(), mode: int64(info.Mode())} // Recompute file checksum only if file size, mode or mtime has changed. if o, ok := mm[path]; ok && o.size == md.size && o.mtime == md.mtime && o.mode == md.mode { md.sum = o.sum } else { b, err := os.ReadFile(path) if err != nil { return err } md.sum = sha256.Sum256(b) log.Printf("archive %s %x\n", path, md.sum) // chunk here, could be done in goroutine. if err := chunkify(root, fmt.Sprintf("%x", md.sum), b, key); err != nil { return err } } index += fmt.Sprintf("%s %d %d %o %x\n", url.PathEscape(path), md.size, md.mtime, md.mode, md.sum) return nil }) return index, err } func getExclude(root string) (str []string, err error) { f, err := os.Open(filepath.Join(root, ".bb", "exclude")) if err != nil && !errors.Is(err, os.ErrNotExist) { return str, err } defer f.Close() scan := bufio.NewScanner(f) for scan.Scan() { if s := scan.Text(); len(s) > 0 { str = append(str, s) } } return str, scan.Err() } func isExcluded(path, base string, excludes []string) bool { for _, x := range excludes { if match(base, x) || match(path, x) { return true } } return false } func match(pattern, name string) bool { if matched, err := path.Match(pattern, name); err != nil { panic(err) } else { return matched } } func readIndex(path string) (md metamap, err error) { f, err := os.Open(path) if err != nil { return md, err } defer f.Close() md = metamap{} scan := bufio.NewScanner(f) for scan.Scan() { var ( p string d metadata s []byte ) n, err := fmt.Sscanf(scan.Text(), "%s %d %d %o %64x", &p, &d.size, &d.mtime, &d.mode, &s) if err != nil || n != 5 { return md, err } copy(d.sum[:], s) path, err := url.PathUnescape(p) if err != nil { return md, err } md[path] = d } return md, scan.Err() } func initBB(root string) (current, previous string, err error) { if err = os.MkdirAll(filepath.Join(root, ".bb"), 0o750); err != nil { return "", "", err } rfs := os.DirFS(root) // Create a private encryption if it doesn't already exists. if _, err := fs.Stat(rfs, filepath.Join(".bb", "key")); errors.Is(err, fs.ErrNotExist) { buf := make([]byte, 32) if _, err := rand.Read(buf); err != nil { return "", "", err } if err := os.WriteFile(filepath.Join(root, ".bb", "key"), buf, 0o600); err != nil { return "", "", err } } // Retrieve the most recent backup index name. prevs, _ := fs.Glob(rfs, filepath.Join(".bb", "index-*")) if len(prevs) > 0 { previous = prevs[len(prevs)-1] } // Create a current backup index now := time.Now() y, m, d := now.Date() h, mn, s := now.Clock() current = filepath.Join(root, ".bb", fmt.Sprintf("index-%d-%02d%02d-%02d%02d%02d", y, m, d, h, mn, s)) return current, previous, nil } // chunkify reads data and writes fixed size encrypted compressed blocks. func chunkify(root, name string, data, key []byte) error { // Steps: // 1. checksum source file (done by caller), this will be the file index name // 2. split in chunks. For each chunk, do: // 1. checksum the chunk, before compression/encryption. This will be the chunk name. // 1. compress the chunk (deflate) // 3. encrypt and authentify the result (aes-gcm) // 4. write in chunk name (see above) // 5 add chunk name to file index // 3. compress and encrypt file index as above. if len(data) <= chunkMax { return flatenc(root, name, data, key) } // Split data in fixed size chunks. chunks := split(data, chunkMax) index := []byte{} log.Println("chunkify", name) log.Println("nchunks:", len(chunks), len(chunks[0])) for i, c := range chunks { sum := sha256.Sum256(c) index = append(index, sum[:]...) if err := flatenc(root, fmt.Sprintf("%x", sum), c, key); err != nil { return fmt.Errorf("chunkify %s block %d: %w", name, i, err) } } log.Println("file index:", name) return flatenc(root, name, index, key) } func unchunkify(root, name string, single bool, key []byte) ([]byte, error) { d, err := unflatenc(root, name, key) if single || err != nil { return d, err } sums := split(d, 32) raw := []byte{} for i, sum := range sums { d, err := unflatenc(root, fmt.Sprintf("%x", sum), key) if err != nil { return d, err } log.Printf("chunk %d %d %x\n", i, len(d), sum) raw = append(raw, d...) } sum := sha256.Sum256(raw) log.Printf("raw %d %x\n", len(raw), sum) return nil, nil } func flatenc(root, name string, data, key []byte) error { // Flatten data. var buf bytes.Buffer zw, _ := flate.NewWriter(&buf, flate.DefaultCompression) if _, err := zw.Write(data); err != nil { return fmt.Errorf("flatenc flatten write: %w", err) } if err := zw.Close(); err != nil { return fmt.Errorf("flatenc flatten close: %w", err) } // Encrypt and authentify. cb, err := aes.NewCipher(key) if err != nil { return fmt.Errorf("flatenc cipher: %w", err) } aesgcm, err := cipher.NewGCM(cb) if err != nil { return fmt.Errorf("flatenc gcm: %w", err) } iv := make([]byte, aesgcm.NonceSize()) if _, err := rand.Read(iv); err != nil { return fmt.Errorf("flatenc iv: %w", err) } log.Printf("iv: %d %x\n", len(iv), iv) enc := aesgcm.Seal(nil, iv, buf.Bytes(), nil) // Write to a file named with original checksum. return writeCksumFile(filepath.Join(root, ".bb", "chunks"), name, append(iv, enc...)) } func unflatenc(root, name string, key []byte) ([]byte, error) { enc, err := os.ReadFile(filepath.Join(root, ".bb", "chunks", name[:2], name[2:])) if err != nil { return nil, err } cb, err := aes.NewCipher(key) if err != nil { return nil, fmt.Errorf("unflatenc cipher: %w", err) } aesgcm, err := cipher.NewGCM(cb) if err != nil { return nil, fmt.Errorf("unflatenc gcm: %w", err) } l := aesgcm.NonceSize() dec, err := aesgcm.Open(nil, enc[:l], enc[l:], nil) if err != nil { return nil, fmt.Errorf("unflatenc open: %w", err) } return io.ReadAll(flate.NewReader(bytes.NewBuffer(dec))) } func split(data []byte, size int) (chunks [][]byte) { offset := 0 for offset+size < len(data) { chunks = append(chunks, data[offset:offset+size]) offset += size } chunks = append(chunks, data[offset:]) return chunks } func writeCksumFile(prefix, name string, data []byte) error { head, tail := name[:2], name[2:] if err := os.MkdirAll(filepath.Join(prefix, head), 0o750); err != nil { return err } return os.WriteFile(filepath.Join(prefix, head, tail), data, 0o640) } func main() { log.SetFlags(log.Lshortfile) rfile := flag.String("read", "", "a cksum file") flag.Parse() wd, err := os.Getwd() if err != nil { log.Fatal(err) } if *rfile != "" { log.Println("rfile:", *rfile) key, err := os.ReadFile(filepath.Join(wd, ".bb", "key")) if err != nil { log.Fatal(err) } d, err := unchunkify(wd, *rfile, false, key) log.Println("d", err, string(d)) // log.Printf("d: %d %v %x\n", len(d), err, d) return } index, oldindex, err := initBB(wd) if err != nil { log.Fatal(err) } md := metamap{} if oldindex != "" { if md, err = readIndex(oldindex); err != nil { log.Fatal(err) } } data, err := getIndex(wd, md) if err != nil { log.Fatal(err) } err = os.WriteFile(index, []byte(data), 0o644) if err != nil { log.Fatal(err) } }