From df9d4b455d0154589b2d317d7aa3eac4a350dd34 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 6 Apr 2024 19:17:28 +0200 Subject: [PATCH] prune: prepare for moving code to repository package --- cmd/restic/cmd_prune.go | 212 ++++++++++++++++++++-------------------- 1 file changed, 107 insertions(+), 105 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 833e72ae7..eef66cc8e 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -194,7 +194,9 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption return err } - plan, stats, err := planPrune(ctx, opts, repo, ignoreSnapshots, printer) + plan, stats, err := PlanPrune(ctx, opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error) { + return getUsedBlobs(ctx, repo, ignoreSnapshots, printer) + }, printer) if err != nil { return err } @@ -211,40 +213,40 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption // Trigger GC to reset garbage collection threshold runtime.GC() - return doPrune(ctx, opts, repo, plan, printer) + return DoPrune(ctx, opts, repo, plan, printer) } -type pruneStats struct { - blobs struct { - used uint - duplicate uint - unused uint - remove uint - repack uint - repackrm uint +type PruneStats struct { + Blobs struct { + Used uint + Duplicate uint + Unused uint + Remove uint + Repack uint + Repackrm uint } - size struct { - used uint64 - duplicate uint64 - unused uint64 - remove uint64 - repack uint64 - repackrm uint64 - unref uint64 - uncompressed uint64 + Size struct { + Used uint64 + Duplicate uint64 + Unused uint64 + Remove uint64 + Repack uint64 + Repackrm uint64 + Unref uint64 + Uncompressed uint64 } - packs struct { - used uint - unused uint - partlyUsed uint - unref uint - keep uint - repack uint - remove uint + Packs struct { + Used uint + Unused uint + PartlyUsed uint + Unref uint + Keep uint + Repack uint + Remove uint } } -type prunePlan struct { +type PrunePlan struct { removePacksFirst restic.IDSet // packs to remove first (unreferenced packs) repackPacks restic.IDSet // packs to repack keepBlobs restic.CountedBlobSet // blobs to keep during repacking @@ -267,26 +269,26 @@ type packInfoWithID struct { mustCompress bool } -// planPrune selects which files to rewrite and which to delete and which blobs to keep. +// PlanPrune selects which files to rewrite and which to delete and which blobs to keep. // Also some summary statistics are returned. -func planPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (prunePlan, pruneStats, error) { - var stats pruneStats +func PlanPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error), printer progress.Printer) (PrunePlan, PruneStats, error) { + var stats PruneStats - usedBlobs, err := getUsedBlobs(ctx, repo, ignoreSnapshots, printer) + usedBlobs, err := getUsedBlobs(ctx, repo) if err != nil { - return prunePlan{}, stats, err + return PrunePlan{}, stats, err } printer.P("searching used packs...\n") keepBlobs, indexPack, err := packInfoFromIndex(ctx, repo.Index(), usedBlobs, &stats, printer) if err != nil { - return prunePlan{}, stats, err + return PrunePlan{}, stats, err } printer.P("collecting packs for deletion and repacking\n") plan, err := decidePackAction(ctx, opts, repo, indexPack, &stats, printer) if err != nil { - return prunePlan{}, stats, err + return PrunePlan{}, stats, err } if len(plan.repackPacks) != 0 { @@ -313,7 +315,7 @@ func planPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, i return plan, stats, nil } -func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs restic.CountedBlobSet, stats *pruneStats, printer progress.Printer) (restic.CountedBlobSet, map[restic.ID]packInfo, error) { +func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (restic.CountedBlobSet, map[restic.ID]packInfo, error) { // iterate over all blobs in index to find out which blobs are duplicates // The counter in usedBlobs describes how many instances of the blob exist in the repository index // Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist @@ -384,20 +386,20 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re ip.unusedBlobs++ // count as duplicate, will later on change one copy to be counted as used - stats.size.duplicate += size - stats.blobs.duplicate++ + stats.Size.Duplicate += size + stats.Blobs.Duplicate++ case dupCount == 1: // used blob, not duplicate ip.usedSize += size ip.usedBlobs++ - stats.size.used += size - stats.blobs.used++ + stats.Size.Used += size + stats.Blobs.Used++ default: // unused blob ip.unusedSize += size ip.unusedBlobs++ - stats.size.unused += size - stats.blobs.unused++ + stats.Size.Unused += size + stats.Blobs.Unused++ } if !blob.IsCompressed() { ip.uncompressed = true @@ -431,10 +433,10 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re ip.unusedSize -= size ip.unusedBlobs-- // same for the global statistics - stats.size.used += size - stats.blobs.used++ - stats.size.duplicate -= size - stats.blobs.duplicate-- + stats.Size.Used += size + stats.Blobs.Used++ + stats.Size.Duplicate -= size + stats.Blobs.Duplicate-- // let other occurrences remain marked as unused usedBlobs[bh] = 1 default: @@ -463,7 +465,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re return usedBlobs, indexPack, nil } -func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Repository, indexPack map[restic.ID]packInfo, stats *pruneStats, printer progress.Printer) (prunePlan, error) { +func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Repository, indexPack map[restic.ID]packInfo, stats *PruneStats, printer progress.Printer) (PrunePlan, error) { removePacksFirst := restic.NewIDSet() removePacks := restic.NewIDSet() repackPacks := restic.NewIDSet() @@ -487,7 +489,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi // Pack was not referenced in index and is not used => immediately remove! printer.V("will remove pack %v as it is unused and not indexed\n", id.Str()) removePacksFirst.Insert(id) - stats.size.unref += uint64(packSize) + stats.Size.Unref += uint64(packSize) return nil } @@ -503,15 +505,15 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi // statistics switch { case p.usedBlobs == 0: - stats.packs.unused++ + stats.Packs.Unused++ case p.unusedBlobs == 0: - stats.packs.used++ + stats.Packs.Used++ default: - stats.packs.partlyUsed++ + stats.Packs.PartlyUsed++ } if p.uncompressed { - stats.size.uncompressed += p.unusedSize + p.usedSize + stats.Size.Uncompressed += p.unusedSize + p.usedSize } mustCompress := false if repoVersion >= 2 { @@ -525,17 +527,17 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi case p.usedBlobs == 0: // All blobs in pack are no longer used => remove pack! removePacks.Insert(id) - stats.blobs.remove += p.unusedBlobs - stats.size.remove += p.unusedSize + stats.Blobs.Remove += p.unusedBlobs + stats.Size.Remove += p.unusedSize case opts.RepackCachableOnly && p.tpe == restic.DataBlob: // if this is a data pack and --repack-cacheable-only is set => keep pack! - stats.packs.keep++ + stats.Packs.Keep++ case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress: if packSize >= int64(targetPackSize) { // All blobs in pack are used and not mixed => keep pack! - stats.packs.keep++ + stats.Packs.Keep++ } else { repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress}) } @@ -551,7 +553,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi }) bar.Done() if err != nil { - return prunePlan{}, err + return PrunePlan{}, err } // At this point indexPacks contains only missing packs! @@ -561,8 +563,8 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi for id, p := range indexPack { if p.usedBlobs == 0 { ignorePacks.Insert(id) - stats.blobs.remove += p.unusedBlobs - stats.size.remove += p.unusedSize + stats.Blobs.Remove += p.unusedBlobs + stats.Size.Remove += p.unusedSize delete(indexPack, id) } } @@ -572,7 +574,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi for id := range indexPack { printer.E(" %v\n", id) } - return prunePlan{}, errorPacksMissing + return PrunePlan{}, errorPacksMissing } if len(ignorePacks) != 0 { printer.E("Missing but unneeded pack files are referenced in the index, will be repaired\n") @@ -584,7 +586,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi if len(repackSmallCandidates) < 10 { // too few small files to be worth the trouble, this also prevents endlessly repacking // if there is just a single pack file below the target size - stats.packs.keep += uint(len(repackSmallCandidates)) + stats.Packs.Keep += uint(len(repackSmallCandidates)) } else { repackCandidates = append(repackCandidates, repackSmallCandidates...) } @@ -612,26 +614,26 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi repack := func(id restic.ID, p packInfo) { repackPacks.Insert(id) - stats.blobs.repack += p.unusedBlobs + p.usedBlobs - stats.size.repack += p.unusedSize + p.usedSize - stats.blobs.repackrm += p.unusedBlobs - stats.size.repackrm += p.unusedSize + stats.Blobs.Repack += p.unusedBlobs + p.usedBlobs + stats.Size.Repack += p.unusedSize + p.usedSize + stats.Blobs.Repackrm += p.unusedBlobs + stats.Size.Repackrm += p.unusedSize if p.uncompressed { - stats.size.uncompressed -= p.unusedSize + p.usedSize + stats.Size.Uncompressed -= p.unusedSize + p.usedSize } } // calculate limit for number of unused bytes in the repo after repacking - maxUnusedSizeAfter := opts.maxUnusedBytes(stats.size.used) + maxUnusedSizeAfter := opts.maxUnusedBytes(stats.Size.Used) for _, p := range repackCandidates { - reachedUnusedSizeAfter := (stats.size.unused-stats.size.remove-stats.size.repackrm < maxUnusedSizeAfter) - reachedRepackSize := stats.size.repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes + reachedUnusedSizeAfter := (stats.Size.Unused-stats.Size.Remove-stats.Size.Repackrm < maxUnusedSizeAfter) + reachedRepackSize := stats.Size.Repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes packIsLargeEnough := p.unusedSize+p.usedSize >= uint64(targetPackSize) switch { case reachedRepackSize: - stats.packs.keep++ + stats.Packs.Keep++ case p.tpe != restic.DataBlob, p.mustCompress: // repacking non-data packs / uncompressed-trees is only limited by repackSize @@ -639,23 +641,23 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi case reachedUnusedSizeAfter && packIsLargeEnough: // for all other packs stop repacking if tolerated unused size is reached. - stats.packs.keep++ + stats.Packs.Keep++ default: repack(p.ID, p.packInfo) } } - stats.packs.unref = uint(len(removePacksFirst)) - stats.packs.repack = uint(len(repackPacks)) - stats.packs.remove = uint(len(removePacks)) + stats.Packs.Unref = uint(len(removePacksFirst)) + stats.Packs.Repack = uint(len(repackPacks)) + stats.Packs.Remove = uint(len(removePacks)) if repo.Config().Version < 2 { // compression not supported for repository format version 1 - stats.size.uncompressed = 0 + stats.Size.Uncompressed = 0 } - return prunePlan{removePacksFirst: removePacksFirst, + return PrunePlan{removePacksFirst: removePacksFirst, removePacks: removePacks, repackPacks: repackPacks, ignorePacks: ignorePacks, @@ -663,54 +665,54 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi } // printPruneStats prints out the statistics -func printPruneStats(printer progress.Printer, stats pruneStats) error { - printer.V("\nused: %10d blobs / %s\n", stats.blobs.used, ui.FormatBytes(stats.size.used)) - if stats.blobs.duplicate > 0 { - printer.V("duplicates: %10d blobs / %s\n", stats.blobs.duplicate, ui.FormatBytes(stats.size.duplicate)) +func printPruneStats(printer progress.Printer, stats PruneStats) error { + printer.V("\nused: %10d blobs / %s\n", stats.Blobs.Used, ui.FormatBytes(stats.Size.Used)) + if stats.Blobs.Duplicate > 0 { + printer.V("duplicates: %10d blobs / %s\n", stats.Blobs.Duplicate, ui.FormatBytes(stats.Size.Duplicate)) } - printer.V("unused: %10d blobs / %s\n", stats.blobs.unused, ui.FormatBytes(stats.size.unused)) - if stats.size.unref > 0 { - printer.V("unreferenced: %s\n", ui.FormatBytes(stats.size.unref)) + printer.V("unused: %10d blobs / %s\n", stats.Blobs.Unused, ui.FormatBytes(stats.Size.Unused)) + if stats.Size.Unref > 0 { + printer.V("unreferenced: %s\n", ui.FormatBytes(stats.Size.Unref)) } - totalBlobs := stats.blobs.used + stats.blobs.unused + stats.blobs.duplicate - totalSize := stats.size.used + stats.size.duplicate + stats.size.unused + stats.size.unref - unusedSize := stats.size.duplicate + stats.size.unused + totalBlobs := stats.Blobs.Used + stats.Blobs.Unused + stats.Blobs.Duplicate + totalSize := stats.Size.Used + stats.Size.Duplicate + stats.Size.Unused + stats.Size.Unref + unusedSize := stats.Size.Duplicate + stats.Size.Unused printer.V("total: %10d blobs / %s\n", totalBlobs, ui.FormatBytes(totalSize)) printer.V("unused size: %s of total size\n", ui.FormatPercent(unusedSize, totalSize)) - printer.P("\nto repack: %10d blobs / %s\n", stats.blobs.repack, ui.FormatBytes(stats.size.repack)) - printer.P("this removes: %10d blobs / %s\n", stats.blobs.repackrm, ui.FormatBytes(stats.size.repackrm)) - printer.P("to delete: %10d blobs / %s\n", stats.blobs.remove, ui.FormatBytes(stats.size.remove+stats.size.unref)) - totalPruneSize := stats.size.remove + stats.size.repackrm + stats.size.unref - printer.P("total prune: %10d blobs / %s\n", stats.blobs.remove+stats.blobs.repackrm, ui.FormatBytes(totalPruneSize)) - if stats.size.uncompressed > 0 { - printer.P("not yet compressed: %s\n", ui.FormatBytes(stats.size.uncompressed)) + printer.P("\nto repack: %10d blobs / %s\n", stats.Blobs.Repack, ui.FormatBytes(stats.Size.Repack)) + printer.P("this removes: %10d blobs / %s\n", stats.Blobs.Repackrm, ui.FormatBytes(stats.Size.Repackrm)) + printer.P("to delete: %10d blobs / %s\n", stats.Blobs.Remove, ui.FormatBytes(stats.Size.Remove+stats.Size.Unref)) + totalPruneSize := stats.Size.Remove + stats.Size.Repackrm + stats.Size.Unref + printer.P("total prune: %10d blobs / %s\n", stats.Blobs.Remove+stats.Blobs.Repackrm, ui.FormatBytes(totalPruneSize)) + if stats.Size.Uncompressed > 0 { + printer.P("not yet compressed: %s\n", ui.FormatBytes(stats.Size.Uncompressed)) } - printer.P("remaining: %10d blobs / %s\n", totalBlobs-(stats.blobs.remove+stats.blobs.repackrm), ui.FormatBytes(totalSize-totalPruneSize)) - unusedAfter := unusedSize - stats.size.remove - stats.size.repackrm + printer.P("remaining: %10d blobs / %s\n", totalBlobs-(stats.Blobs.Remove+stats.Blobs.Repackrm), ui.FormatBytes(totalSize-totalPruneSize)) + unusedAfter := unusedSize - stats.Size.Remove - stats.Size.Repackrm printer.P("unused size after prune: %s (%s of remaining size)\n", ui.FormatBytes(unusedAfter), ui.FormatPercent(unusedAfter, totalSize-totalPruneSize)) printer.P("\n") - printer.V("totally used packs: %10d\n", stats.packs.used) - printer.V("partly used packs: %10d\n", stats.packs.partlyUsed) - printer.V("unused packs: %10d\n\n", stats.packs.unused) + printer.V("totally used packs: %10d\n", stats.Packs.Used) + printer.V("partly used packs: %10d\n", stats.Packs.PartlyUsed) + printer.V("unused packs: %10d\n\n", stats.Packs.Unused) - printer.V("to keep: %10d packs\n", stats.packs.keep) - printer.V("to repack: %10d packs\n", stats.packs.repack) - printer.V("to delete: %10d packs\n", stats.packs.remove) - if stats.packs.unref > 0 { - printer.V("to delete: %10d unreferenced packs\n\n", stats.packs.unref) + printer.V("to keep: %10d packs\n", stats.Packs.Keep) + printer.V("to repack: %10d packs\n", stats.Packs.Repack) + printer.V("to delete: %10d packs\n", stats.Packs.Remove) + if stats.Packs.Unref > 0 { + printer.V("to delete: %10d unreferenced packs\n\n", stats.Packs.Unref) } return nil } -// doPrune does the actual pruning: +// DoPrune does the actual pruning: // - remove unreferenced packs first // - repack given pack files while keeping the given blobs // - rebuild the index while ignoring all files that will be deleted // - delete the files // plan.removePacks and plan.ignorePacks are modified in this function. -func doPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, plan prunePlan, printer progress.Printer) (err error) { +func DoPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, plan PrunePlan, printer progress.Printer) (err error) { if opts.DryRun { printer.V("Repeated prune dry-runs can report slightly different amounts of data to keep or repack. This is expected behavior.\n\n") if len(plan.removePacksFirst) > 0 {