package main import ( "bufio" "bytes" "compress/flate" "crypto/aes" "crypto/cipher" "crypto/rand" "crypto/sha256" "errors" "flag" "fmt" "io" "io/fs" "log" "net/url" "os" "path" "path/filepath" "strings" "time" ) type fileInfo struct { size, mtime, mode int64 sum [sha256.Size]byte } type fileMap map[string]fileInfo const chunkMax = 1 << 17 // 131072 type user struct { id, key []byte config, cache, host, repo string ignore []string } func initUser(repo string) (*user, error) { if repo == "" { return nil, errors.New("repo missing") } config, err := os.UserConfigDir() if err != nil { return nil, err } cache, err := os.UserCacheDir() if err != nil { return nil, err } host, err := os.Hostname() if err != nil { return nil, err } abs, err := filepath.Abs(repo) if err != nil { return nil, err } u := user{ config: filepath.Join(config, "bb"), cache: filepath.Join(cache, "bb"), host: host, repo: abs, } u.ignore = linesFile(filepath.Join(u.config, "ignore")) k, err := os.ReadFile(filepath.Join(u.config, "key")) if err == nil { u.id, u.key = k[:12], k[12:] return &u, nil } // Create a key and save it. buf := make([]byte, 12+32) if _, err := rand.Read(buf); err != nil { return nil, err } u.id, u.key = buf[:12], buf[12:] if err := os.MkdirAll(u.config, 0o755); err != nil { return nil, err } return &u, os.WriteFile(filepath.Join(u.config, "key"), buf, 0o600) } func getIndex(u *user, root string, mm fileMap) (string, error) { rfs := os.DirFS(root) // Walk the fs tree to perform: // - Identification of changed files since the previous backup (if any) // - Chunkification of changed files // - Generation of backup index var index string if err := fs.WalkDir(rfs, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err } info, err := d.Info() if err != nil { return err } if isExcluded(path, d.Name(), u.ignore) { return fs.SkipDir } if info.IsDir() { return nil } md := fileInfo{size: info.Size(), mtime: info.ModTime().Unix(), mode: int64(info.Mode())} // Recompute file checksum only if file size or mtime has changed. if o, ok := mm[path]; ok && o.size == md.size && o.mtime == md.mtime { md.sum = o.sum } else { b, err := os.ReadFile(path) if err != nil { return err } md.sum = sha256.Sum256(b) log.Printf("archive %s %x\n", path, md.sum) if err := chunkify(u, root, fmt.Sprintf("%x", md.sum), b); err != nil { return err } } index += fmt.Sprintf("%s %d %d %o %x\n", url.PathEscape(path), md.size, md.mtime, md.mode, md.sum) return nil }); err != nil { return "", err } sum := sha256.Sum256([]byte(index)) name := fmt.Sprintf("%x", sum) return name, flatenc(u, name, []byte(index)) } func isExcluded(path, base string, excludes []string) bool { for _, x := range excludes { if match(base, x) || match(path, x) { return true } } return false } func match(pattern, name string) bool { if m, err := path.Match(pattern, name); err == nil && m { return true } return false } // chunkify reads data and writes encrypted compressed chunks. func chunkify(u *user, root, name string, data []byte) error { // Steps: // 1. checksum source file (done by caller), this will be the file index name // 2. split in chunks. For each chunk, do: // 1. checksum the chunk, before compression/encryption. This will be the chunk name. // 1. compress the chunk (deflate) // 3. encrypt and authentify the result (aes-gcm) // 4. write in chunk name (see above) // 5 add chunk name to file index // 3. compress and encrypt file index as above. if len(data) <= chunkMax { return flatenc(u, name, data) } // Split data in fixed size chunks. chunks := split(data, chunkMax) index := []byte{} log.Println("chunkify", name) log.Println("nchunks:", len(chunks), len(chunks[0])) for i, c := range chunks { sum := sha256.Sum256(c) index = append(index, sum[:]...) if err := flatenc(u, fmt.Sprintf("%x", sum), c); err != nil { return fmt.Errorf("chunkify %s block %d: %w", name, i, err) } } log.Println("file index:", name) return flatenc(u, name, index) } func unchunkify(u *user, name string, single bool) ([]byte, error) { d, err := unflatenc(u, name) if single || err != nil { return d, err } sums := split(d, 32) raw := []byte{} for i, sum := range sums { d, err := unflatenc(u, fmt.Sprintf("%x", sum)) if err != nil { return d, err } log.Printf("chunk %d %d %x\n", i, len(d), sum) raw = append(raw, d...) } sum := sha256.Sum256(raw) log.Printf("raw %d %x\n", len(raw), sum) return nil, nil } func flatenc(u *user, name string, data []byte) error { // Flatten data. var buf bytes.Buffer zw, _ := flate.NewWriter(&buf, flate.DefaultCompression) if _, err := zw.Write(data); err != nil { return fmt.Errorf("flatenc flatten write: %w", err) } if err := zw.Close(); err != nil { return fmt.Errorf("flatenc flatten close: %w", err) } // Encrypt and authentify. cb, err := aes.NewCipher(u.key) if err != nil { return fmt.Errorf("flatenc cipher: %w", err) } aesgcm, err := cipher.NewGCM(cb) if err != nil { return fmt.Errorf("flatenc gcm: %w", err) } iv := make([]byte, aesgcm.NonceSize()) if _, err := rand.Read(iv); err != nil { return fmt.Errorf("flatenc iv: %w", err) } enc := aesgcm.Seal(nil, iv, buf.Bytes(), nil) if name == "index" { return os.WriteFile(filepath.Join(u.repo, "index"), append(iv, enc...), 0o644) } return writeChunk(filepath.Join(u.repo, "chunks"), name, append(iv, enc...)) } func unflatenc(u *user, name string) ([]byte, error) { var path string if name == "index" { path = filepath.Join(u.repo, "index") } else { path = filepath.Join(u.repo, "chunks", name[:2], name[2:]) } enc, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("unflatenc %s open %w", path, err) } cb, err := aes.NewCipher(u.key) if err != nil { return nil, fmt.Errorf("unflatenc %s cipher: %w", path, err) } gcm, err := cipher.NewGCM(cb) if err != nil { return nil, fmt.Errorf("unflatenc %s gcm: %w", path, err) } l := gcm.NonceSize() dec, err := gcm.Open(nil, enc[:l], enc[l:], nil) if err != nil { return nil, fmt.Errorf("unflatenc %s gcm open: %w", path, err) } return io.ReadAll(flate.NewReader(bytes.NewBuffer(dec))) } // split divides a data block and returns a slice of fixed size blocks. func split(data []byte, size int) (chunks [][]byte) { offset := 0 for offset+size < len(data) { chunks = append(chunks, data[offset:offset+size]) offset += size } if offset == len(data) { return chunks } return append(chunks, data[offset:]) } func writeChunk(prefix, name string, data []byte) error { head, tail := name[:2], name[2:] if err := os.MkdirAll(filepath.Join(prefix, head), 0o750); err != nil { return err } return os.WriteFile(filepath.Join(prefix, head, tail), data, 0o640) } func lastCacheEntry(name string) string { cache, err := os.UserCacheDir() if err != nil { return "" } if lines := linesFile(filepath.Join(cache, "bb", name)); len(lines) > 0 { return lines[len(lines)-1] } return "" } func linesFile(name string) (lines []string) { f, err := os.Open(name) if err != nil { return nil } scan := bufio.NewScanner(f) for scan.Scan() { lines = append(lines, scan.Text()) } f.Close() return lines } func userUpdate(u *user) error { return nil } func previousIndex(repo, host, root string) string { indexes := linesFile(filepath.Join(repo, "index")) for _, idx := range indexes { if !strings.HasPrefix(idx, host+":"+url.PathEscape(root)+":") { continue } if words := strings.Fields(idx); len(words) > 1 { return words[1] } } return "" } func currentIndex(host, root string) string { now := time.Now() y, m, d := now.Date() h, mn, s := now.Clock() return fmt.Sprintf("%s:%s:%d-%02d%02d-%02d%02d%02d", host, url.PathEscape(root), y, m, d, h, mn, s) } // parent returns the parent index of archive for incremental update. func parent(u *user, root, index string) (fileMap, error) { var psum string for _, line := range strings.Split(index, "\n") { if words := strings.Fields(line); len(words) == 2 && strings.HasPrefix(words[0], u.host+":"+url.PathEscape(root)+":") { psum = words[1] } } if psum == "" { return nil, nil } d, err := unflatenc(u, psum) if err != nil { return nil, nil } md := fileMap{} for _, line := range strings.Split(string(d), "\n") { if line == "" { continue } var ( p string d fileInfo s []byte ) n, err := fmt.Sscanf(line, "%s %d %d %o %64x", &p, &d.size, &d.mtime, &d.mode, &s) if err != nil || n != 5 { return md, err } copy(d.sum[:], s) path, err := url.PathUnescape(p) if err != nil { return md, err } md[path] = d } return md, nil } func create(args []string) (err error) { flg := flag.NewFlagSet("create", flag.ContinueOnError) flg.Usage = func() { fmt.Println("Usage: bb create [src [dest]]") } if err = flg.Parse(args); err != nil { return err } repo := lastCacheEntry("repo") root := lastCacheEntry("root") switch len(flg.Args()) { case 2: root, repo = flg.Arg(0), flg.Arg(1) case 1: root = flg.Arg(0) } if root == "" { return errors.New("root missing") } u, err := initUser(repo) if err != nil { return err } abs, err := filepath.Abs(root) if err != nil { return err } p, err := unflatenc(u, "index") if err != nil && !errors.Is(err, os.ErrNotExist) { return err } index := string(p) md, err := parent(u, abs, index) if err != nil { return err } data, err := getIndex(u, root, md) if err != nil { return err } index += fmt.Sprintf("%s %s\n", currentIndex(u.host, abs), string(data)) if err := flatenc(u, "index", []byte(index)); err != nil { return err } return userUpdate(u) } func extract(args []string) (err error) { flg := flag.NewFlagSet("extract", flag.ContinueOnError) flg.Usage = func() { fmt.Println("Usage: bb extract [repo] [src] [dest]") } if err = flg.Parse(args); err != nil { return err } return nil } func list(args []string) (err error) { flg := flag.NewFlagSet("list", flag.ContinueOnError) flg.Usage = func() { fmt.Println("Usage: bb list [repo]") } if err = flg.Parse(args); err != nil { return err } repo := lastCacheEntry("repo") arc := "index" switch flg.NArg() { case 1: repo = flg.Arg(0) case 2: repo = flg.Arg(0) arc = flg.Arg(1) } u, err := initUser(repo) if err != nil { return err } d, err := unflatenc(u, arc) log.Println("index:", string(d), err) return nil } const usage = `bb is a backup tool. Usage: bb [commands] [arguments] Commands: create Create a backup archive of a directory extract Extract files from a backup archive list List elements in a backup or a repository Use "bb -help" for more information about a command. ` func main() { var cmd string var err error log.SetFlags(log.Lshortfile) if len(os.Args) > 1 { cmd = os.Args[1] } switch cmd { case "create": err = create(os.Args[2:]) case "extract": err = extract(os.Args[2:]) case "list": err = list(os.Args[2:]) default: fmt.Print(usage) } if err != nil && !errors.Is(err, flag.ErrHelp) { log.Fatal(err) os.Exit(1) } os.Exit(0) }