diff --git a/cmd/entire/cli/checkpoint_group.go b/cmd/entire/cli/checkpoint_group.go index 11643b8205..76416abfbd 100644 --- a/cmd/entire/cli/checkpoint_group.go +++ b/cmd/entire/cli/checkpoint_group.go @@ -39,6 +39,7 @@ Examples: cmd.AddCommand(newExplainCmd()) cmd.AddCommand(newRewindCmd()) cmd.AddCommand(newCheckpointSearchCmd()) + cmd.AddCommand(newCheckpointMigrateRefsCmd()) return cmd } diff --git a/cmd/entire/cli/checkpoint_migrate_refs.go b/cmd/entire/cli/checkpoint_migrate_refs.go new file mode 100644 index 0000000000..a148670778 --- /dev/null +++ b/cmd/entire/cli/checkpoint_migrate_refs.go @@ -0,0 +1,413 @@ +package cli + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "sync" + "sync/atomic" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/gitrepo" + "github.com/entireio/cli/cmd/entire/cli/interactive" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/strategy" + + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/spf13/cobra" +) + +// treeRefName returns the ref that points at a checkpoint's metadata subtree: +// refs/entire/checkpoints///tree. +func treeRefName(cpID id.CheckpointID) plumbing.ReferenceName { + return plumbing.ReferenceName("refs/entire/checkpoints/" + cpID.Path() + "/tree") +} + +// checkpointEntry pairs a checkpoint ID with its metadata-subtree hash on v1. +type checkpointEntry struct { + ID id.CheckpointID + Tree plumbing.Hash +} + +// buildCheckpointList walks the entire/checkpoints/v1 branch tree and returns +// one entry per checkpoint. A missing v1 branch yields an empty list (no error). +func buildCheckpointList(ctx context.Context, repo *git.Repository) ([]checkpointEntry, error) { + branch := plumbing.NewBranchReferenceName(paths.MetadataBranchName) + ref, err := repo.Reference(branch, true) + if err != nil { + // No v1 branch -> nothing to migrate. + return nil, nil //nolint:nilerr // absent branch is an empty list, not an error + } + commit, err := repo.CommitObject(ref.Hash()) + if err != nil { + return nil, fmt.Errorf("resolve v1 commit: %w", err) + } + tree, err := commit.Tree() + if err != nil { + return nil, fmt.Errorf("resolve v1 tree: %w", err) + } + + var entries []checkpointEntry + walkErr := checkpoint.WalkCheckpointShards(ctx, repo, tree, func(cpID id.CheckpointID, cpTreeHash plumbing.Hash) error { + entries = append(entries, checkpointEntry{ID: cpID, Tree: cpTreeHash}) + return nil + }) + if walkErr != nil { + return nil, fmt.Errorf("walk checkpoint shards: %w", walkErr) + } + return entries, nil +} + +// writeCacheFile writes one "\t" line per entry, creating parent +// directories as needed. +func writeCacheFile(path string, entries []checkpointEntry) error { + if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil { + return fmt.Errorf("create cache dir: %w", err) + } + var buf bytes.Buffer + for _, e := range entries { + fmt.Fprintf(&buf, "%s\t%s\n", e.ID, e.Tree.String()) + } + if err := os.WriteFile(path, buf.Bytes(), 0o644); err != nil { //nolint:gosec // cache file under git common dir + return fmt.Errorf("write cache file: %w", err) + } + return nil +} + +// readCacheFile parses the TSV cache file. Blank lines are skipped; malformed +// lines return an error naming the offending content. +func readCacheFile(path string) ([]checkpointEntry, error) { + f, err := os.Open(path) //nolint:gosec // cache file under git common dir + if err != nil { + return nil, fmt.Errorf("open cache file: %w", err) + } + defer f.Close() + + var entries []checkpointEntry + sc := bufio.NewScanner(f) + sc.Buffer(make([]byte, 0, 64*1024), 1024*1024) + for sc.Scan() { + line := strings.TrimSpace(sc.Text()) + if line == "" { + continue + } + idStr, hashStr, ok := strings.Cut(line, "\t") + if !ok { + return nil, fmt.Errorf("malformed cache line: %q", line) + } + cpID, err := id.NewCheckpointID(idStr) + if err != nil { + return nil, fmt.Errorf("malformed cache line %q: %w", line, err) + } + entries = append(entries, checkpointEntry{ID: cpID, Tree: plumbing.NewHash(hashStr)}) + } + if err := sc.Err(); err != nil { + return nil, fmt.Errorf("read cache file: %w", err) + } + return entries, nil +} + +const ( + treeRefPrefix = "refs/entire/checkpoints/" + treeRefSuffix = "/tree" +) + +// snapshotExistingTreeRefs reads every refs/entire/checkpoints/.../tree ref once +// into a map keyed by full ref name. Reading up front keeps the later existence +// check a concurrency-safe map read (go-git ref reads are not parallelized). +func snapshotExistingTreeRefs(repo *git.Repository) (map[string]plumbing.Hash, error) { + iter, err := repo.References() + if err != nil { + return nil, fmt.Errorf("list references: %w", err) + } + defer iter.Close() + + out := make(map[string]plumbing.Hash) + err = iter.ForEach(func(ref *plumbing.Reference) error { + name := ref.Name().String() + if strings.HasPrefix(name, treeRefPrefix) && strings.HasSuffix(name, treeRefSuffix) { + out[name] = ref.Hash() + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("iterate references: %w", err) + } + return out, nil +} + +// refUpdate is a single ref that must be created or repointed. +type refUpdate struct { + Ref string + Hash plumbing.Hash +} + +// migrateRefsResult summarizes a run. +type migrateRefsResult struct { + Created int + Skipped int + Total int +} + +// processEntries fans the entries across a worker pool. Each worker compares the +// desired ref against the pre-snapshotted existing refs: it skips when present +// and already correct, otherwise emits a refUpdate. Progress (processed/total) +// is written to the progress writer, throttled. Writes are NOT performed here. +func processEntries(ctx context.Context, entries []checkpointEntry, existing map[string]plumbing.Hash, workers int, progress io.Writer) ([]refUpdate, migrateRefsResult, error) { + total := len(entries) + if workers < 1 { + workers = 1 + } + + in := make(chan checkpointEntry) + out := make(chan refUpdate) + var processed int64 + + var wg sync.WaitGroup + for range workers { + wg.Go(func() { + for e := range in { + if ctx.Err() != nil { + return + } + ref := treeRefName(e.ID).String() + if cur, ok := existing[ref]; !ok || cur != e.Tree { + out <- refUpdate{Ref: ref, Hash: e.Tree} + } + n := atomic.AddInt64(&processed, 1) + reportProgress(progress, n, int64(total)) + } + }) + } + + go func() { + defer close(in) + for _, e := range entries { + select { + case in <- e: + case <-ctx.Done(): + return + } + } + }() + + go func() { + wg.Wait() + close(out) + }() + + var updates []refUpdate + for u := range out { + updates = append(updates, u) + } + + flushProgress(progress) + return updates, migrateRefsResult{ + Created: len(updates), + Skipped: total - len(updates), + Total: total, + }, ctx.Err() //nolint:wrapcheck // ctx.Err() is wrapped by the caller (runMigrateTreeRefs) +} + +// reportProgress prints "n/total". On a terminal it rewrites a single line with +// \r; otherwise it prints periodically (every 100 items) to avoid log spam. +func reportProgress(w io.Writer, n, total int64) { + if interactive.IsTerminalWriter(w) { + fmt.Fprintf(w, "\r%d/%d", n, total) + return + } + if n == total || n%100 == 0 { + fmt.Fprintf(w, "%d/%d\n", n, total) + } +} + +// flushProgress ends the single-line terminal progress with a newline. +func flushProgress(w io.Writer) { + if interactive.IsTerminalWriter(w) { + fmt.Fprintln(w) + } +} + +// applyRefUpdates creates/repoints refs in batches via `git update-ref --stdin`. +// Each batch is one atomic git transaction. The "update " line +// form (old value omitted) creates-or-updates regardless of current value; ref +// names are controlled hex + "/tree", so the non-NUL line format is safe. +func applyRefUpdates(ctx context.Context, repoRoot string, updates []refUpdate, batchSize int) error { + if len(updates) == 0 { + return nil + } + if batchSize < 1 { + batchSize = 1000 + } + for start := 0; start < len(updates); start += batchSize { + end := min(start+batchSize, len(updates)) + if err := applyRefUpdateBatch(ctx, repoRoot, updates[start:end]); err != nil { + return err + } + } + return nil +} + +func applyRefUpdateBatch(ctx context.Context, repoRoot string, batch []refUpdate) error { + var stdin bytes.Buffer + for _, u := range batch { + fmt.Fprintf(&stdin, "update %s %s\n", u.Ref, u.Hash.String()) + } + cmd := exec.CommandContext(ctx, "git", "update-ref", "--stdin") + cmd.Dir = repoRoot + cmd.Stdin = &stdin + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("git update-ref --stdin: %s: %w", strings.TrimSpace(string(out)), err) + } + return nil +} + +const refUpdateBatchSize = 1000 + +// migrateRefsOptions configures a migrate-refs run. repoRoot, out, and progress +// are required; the cobra layer fills them from CWD and the command's streams. +type migrateRefsOptions struct { + repoRoot string + cacheFile string + workers int + refresh bool + dryRun bool + out io.Writer + progress io.Writer +} + +// runMigrateTreeRefs runs both phases and returns the summary. In dry-run mode +// the returned Created count reflects the refs that WOULD be written. +func runMigrateTreeRefs(ctx context.Context, opts migrateRefsOptions) (migrateRefsResult, error) { + if opts.workers < 1 { + opts.workers = runtime.NumCPU() + } + + repo, err := gitrepo.OpenPath(opts.repoRoot) + if err != nil { + return migrateRefsResult{}, fmt.Errorf("open repository: %w", err) + } + + // Phase 1: build (or reuse) the checkpoint list. + entries, err := loadOrBuildList(ctx, repo, opts) + if err != nil { + return migrateRefsResult{}, err + } + if len(entries) == 0 { + fmt.Fprintln(opts.out, "No checkpoints found on entire/checkpoints/v1; nothing to do.") + return migrateRefsResult{}, nil + } + + // Phase 2: snapshot existing refs, decide updates, apply. + existing, err := snapshotExistingTreeRefs(repo) + if err != nil { + return migrateRefsResult{}, err + } + updates, result, err := processEntries(ctx, entries, existing, opts.workers, opts.progress) + if err != nil { + return migrateRefsResult{}, fmt.Errorf("process checkpoints: %w", err) + } + + if opts.dryRun { + fmt.Fprintf(opts.out, "[dry-run] would create %d, skip %d (total %d)\n", result.Created, result.Skipped, result.Total) + return result, nil + } + + if err := applyRefUpdates(ctx, opts.repoRoot, updates, refUpdateBatchSize); err != nil { + return migrateRefsResult{}, err + } + fmt.Fprintf(opts.out, "created=%d skipped=%d total=%d\n", result.Created, result.Skipped, result.Total) + return result, nil +} + +// loadOrBuildList returns the cached entry list when a cache file exists and +// --refresh is not set; otherwise it walks v1 and writes the cache file. +func loadOrBuildList(ctx context.Context, repo *git.Repository, opts migrateRefsOptions) ([]checkpointEntry, error) { + if !opts.refresh && opts.cacheFile != "" { + if _, statErr := os.Stat(opts.cacheFile); statErr == nil { + entries, err := readCacheFile(opts.cacheFile) + if err != nil { + return nil, err + } + fmt.Fprintf(opts.out, "Reusing cached list: %d checkpoints (%s)\n", len(entries), opts.cacheFile) + return entries, nil + } + } + + entries, err := buildCheckpointList(ctx, repo) + if err != nil { + return nil, err + } + if opts.cacheFile != "" { + if err := writeCacheFile(opts.cacheFile, entries); err != nil { + return nil, err + } + } + fmt.Fprintf(opts.out, "Found %d checkpoints on entire/checkpoints/v1\n", len(entries)) + return entries, nil +} + +// newCheckpointMigrateRefsCmd builds the hidden `entire checkpoint migrate-refs` +// command that backfills a tree ref per checkpoint. +func newCheckpointMigrateRefsCmd() *cobra.Command { + var ( + workers int + cacheFile string + refresh bool + dryRun bool + ) + + cmd := &cobra.Command{ + Use: "migrate-refs", + Short: "Backfill a tree ref per checkpoint (refs/entire/checkpoints///tree)", + Hidden: true, + Args: cobra.NoArgs, + Long: `Create one git ref per checkpoint pointing at that checkpoint's metadata +subtree on the entire/checkpoints/v1 branch. + +The full checkpoint list is read from entire/checkpoints/v1 and cached to a file, +then processed in parallel with progress. Re-runs reuse the cache file and skip +refs that already point at the correct tree. Refs are created locally only.`, + RunE: func(cmd *cobra.Command, _ []string) error { + ctx := cmd.Context() + repoRoot, err := paths.WorktreeRoot(ctx) + if err != nil { + cmd.SilenceUsage = true + return errors.New("not a git repository") + } + if cacheFile == "" { + commonDir, err := strategy.GetGitCommonDir(ctx) + if err != nil { + return fmt.Errorf("resolve git common dir: %w", err) + } + cacheFile = filepath.Join(commonDir, "entire-migrate-refs", "checkpoints.tsv") + } + _, err = runMigrateTreeRefs(ctx, migrateRefsOptions{ + repoRoot: repoRoot, + cacheFile: cacheFile, + workers: workers, + refresh: refresh, + dryRun: dryRun, + out: cmd.OutOrStdout(), + progress: cmd.ErrOrStderr(), + }) + return err + }, + } + + cmd.Flags().IntVar(&workers, "workers", 0, "Number of parallel workers (0 = number of CPUs)") + cmd.Flags().StringVar(&cacheFile, "cache-file", "", "Path to the checkpoint list cache file (default: /entire-migrate-refs/checkpoints.tsv)") + cmd.Flags().BoolVar(&refresh, "refresh", false, "Ignore an existing cache file and re-walk entire/checkpoints/v1") + cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Compute and report; do not write any refs") + return cmd +} diff --git a/cmd/entire/cli/checkpoint_migrate_refs_bench_test.go b/cmd/entire/cli/checkpoint_migrate_refs_bench_test.go new file mode 100644 index 0000000000..d80147b0b4 --- /dev/null +++ b/cmd/entire/cli/checkpoint_migrate_refs_bench_test.go @@ -0,0 +1,224 @@ +package cli + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/gitrepo" + "github.com/entireio/cli/cmd/entire/cli/paths" + + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/object" +) + +// These benchmarks compare two ways to resolve a single checkpoint's metadata +// subtree on a real, large checkpoints repo: +// +// - "V1Walk" (previous approach): resolve the entire/checkpoints/v1 branch -> +// commit -> root tree, then navigate to the / subtree. This +// loads the commit, the 256-entry root tree, the bucket tree, and finally +// the checkpoint subtree. +// - "TreeRef" (new approach): read refs/entire/checkpoints///tree +// directly and load that one tree object. +// +// Each is offered in two flavors: +// - warm: a single shared *git.Repository handle (go-git's object cache warms +// up after the first iteration). This isolates the marginal per-lookup cost +// for repeated lookups inside one long-lived process. +// - cold: a fresh gitrepo.OpenPath per op, modelling a one-shot CLI invocation +// (e.g. `entire checkpoint explain `) where nothing is cached yet. +// +// The benchmarks point at a real repo and SKIP when it is absent, so they never +// run in CI. Override the path with ENTIRE_BENCH_CHECKPOINTS_REPO; the default +// is ~/git/entireio/cli-checkpoints. +// +// Run with, e.g.: +// +// go test ./cmd/entire/cli/ -run '^$' -bench 'BenchmarkResolveSubtree' -benchmem + +func benchRepoPath(b *testing.B) string { + b.Helper() + if p := os.Getenv("ENTIRE_BENCH_CHECKPOINTS_REPO"); p != "" { + return p + } + home, err := os.UserHomeDir() + if err != nil { + b.Skipf("cannot resolve home dir: %v", err) + } + return filepath.Join(home, "git", "entireio", "cli-checkpoints") +} + +// benchCheckpointIDs collects the checkpoint IDs that already have tree refs. +// Using the tree refs (rather than walking v1) to enumerate keeps setup cheap +// and guarantees both resolvers can find every sampled ID. +func benchCheckpointIDs(b *testing.B, repo *git.Repository) []id.CheckpointID { + b.Helper() + iter, err := repo.References() + if err != nil { + b.Fatalf("list references: %v", err) + } + defer iter.Close() + + var ids []id.CheckpointID + err = iter.ForEach(func(ref *plumbing.Reference) error { + name := ref.Name().String() + if !strings.HasPrefix(name, treeRefPrefix) || !strings.HasSuffix(name, treeRefSuffix) { + return nil + } + middle := strings.TrimSuffix(strings.TrimPrefix(name, treeRefPrefix), treeRefSuffix) + cpID, err := id.NewCheckpointID(strings.ReplaceAll(middle, "/", "")) + if err != nil { + return nil //nolint:nilerr // skip refs whose path is not a valid checkpoint id + } + ids = append(ids, cpID) + return nil + }) + if err != nil { + b.Fatalf("iterate references: %v", err) + } + if len(ids) == 0 { + b.Skip("no checkpoint tree refs found; run `entire checkpoint migrate-refs` against the repo first") + } + return ids +} + +func openBenchRepo(b *testing.B) *git.Repository { + b.Helper() + dir := benchRepoPath(b) + if info, err := os.Stat(dir); err != nil || !info.IsDir() { + b.Skipf("benchmark repo not found at %s (set ENTIRE_BENCH_CHECKPOINTS_REPO)", dir) + } + repo, err := gitrepo.OpenPath(dir) + if err != nil { + b.Fatalf("open repo %s: %v", dir, err) + } + return repo +} + +// resolveSubtreeViaV1Walk is the previous approach: navigate the v1 branch tree. +func resolveSubtreeViaV1Walk(repo *git.Repository, cpID id.CheckpointID) (*object.Tree, error) { + branch := plumbing.NewBranchReferenceName(paths.MetadataBranchName) + ref, err := repo.Reference(branch, true) + if err != nil { + return nil, err + } + commit, err := repo.CommitObject(ref.Hash()) + if err != nil { + return nil, err + } + root, err := commit.Tree() + if err != nil { + return nil, err + } + sub, err := root.Tree(cpID.Path()) + if err != nil { + return nil, err + } + return sub, nil +} + +// resolveSubtreeViaTreeRef is the new approach: read the per-checkpoint tree ref. +func resolveSubtreeViaTreeRef(repo *git.Repository, cpID id.CheckpointID) (*object.Tree, error) { + ref, err := repo.Reference(treeRefName(cpID), false) + if err != nil { + return nil, err + } + tree, err := repo.TreeObject(ref.Hash()) + if err != nil { + return nil, err + } + return tree, nil +} + +func BenchmarkResolveSubtree_V1Walk_Warm(b *testing.B) { + repo := openBenchRepo(b) + ids := benchCheckpointIDs(b, repo) + + b.ReportAllocs() + i := 0 + for b.Loop() { + tree, err := resolveSubtreeViaV1Walk(repo, ids[i%len(ids)]) + if err != nil { + b.Fatalf("v1 walk: %v", err) + } + if tree.Hash.IsZero() { + b.Fatal("zero tree hash") + } + i++ + } +} + +func BenchmarkResolveSubtree_TreeRef_Warm(b *testing.B) { + repo := openBenchRepo(b) + ids := benchCheckpointIDs(b, repo) + + b.ReportAllocs() + i := 0 + for b.Loop() { + tree, err := resolveSubtreeViaTreeRef(repo, ids[i%len(ids)]) + if err != nil { + b.Fatalf("tree ref: %v", err) + } + if tree.Hash.IsZero() { + b.Fatal("zero tree hash") + } + i++ + } +} + +func BenchmarkResolveSubtree_V1Walk_Cold(b *testing.B) { + dir := benchRepoPath(b) + if info, err := os.Stat(dir); err != nil || !info.IsDir() { + b.Skipf("benchmark repo not found at %s (set ENTIRE_BENCH_CHECKPOINTS_REPO)", dir) + } + // Enumerate IDs once with a throwaway handle; the timed loop opens fresh. + seedRepo := openBenchRepo(b) + ids := benchCheckpointIDs(b, seedRepo) + + b.ReportAllocs() + i := 0 + for b.Loop() { + repo, err := gitrepo.OpenPath(dir) + if err != nil { + b.Fatalf("open repo: %v", err) + } + tree, err := resolveSubtreeViaV1Walk(repo, ids[i%len(ids)]) + if err != nil { + b.Fatalf("v1 walk: %v", err) + } + if tree.Hash.IsZero() { + b.Fatal("zero tree hash") + } + i++ + } +} + +func BenchmarkResolveSubtree_TreeRef_Cold(b *testing.B) { + dir := benchRepoPath(b) + if info, err := os.Stat(dir); err != nil || !info.IsDir() { + b.Skipf("benchmark repo not found at %s (set ENTIRE_BENCH_CHECKPOINTS_REPO)", dir) + } + seedRepo := openBenchRepo(b) + ids := benchCheckpointIDs(b, seedRepo) + + b.ReportAllocs() + i := 0 + for b.Loop() { + repo, err := gitrepo.OpenPath(dir) + if err != nil { + b.Fatalf("open repo: %v", err) + } + tree, err := resolveSubtreeViaTreeRef(repo, ids[i%len(ids)]) + if err != nil { + b.Fatalf("tree ref: %v", err) + } + if tree.Hash.IsZero() { + b.Fatal("zero tree hash") + } + i++ + } +} diff --git a/cmd/entire/cli/checkpoint_migrate_refs_test.go b/cmd/entire/cli/checkpoint_migrate_refs_test.go new file mode 100644 index 0000000000..565caeacc1 --- /dev/null +++ b/cmd/entire/cli/checkpoint_migrate_refs_test.go @@ -0,0 +1,481 @@ +package cli + +import ( + "bytes" + "context" + "errors" + "os" + "path/filepath" + "sort" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/gitrepo" + "github.com/entireio/cli/redact" + + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/object" +) + +func TestTreeRefName(t *testing.T) { + t.Parallel() + + got := treeRefName(id.MustCheckpointID("ab3c4d5e6f70")) + want := "refs/entire/checkpoints/ab/3c4d5e6f70/tree" + if got.String() != want { + t.Fatalf("treeRefName = %q, want %q", got.String(), want) + } +} + +// seedV1Checkpoints inits a repo, makes an initial commit, and writes the given +// checkpoint IDs onto entire/checkpoints/v1. Returns the repo dir. +func seedV1Checkpoints(t *testing.T, ids ...string) string { + t.Helper() + + dir := t.TempDir() + repo, err := git.PlainInit(dir, false) + if err != nil { + t.Fatalf("PlainInit: %v", err) + } + wt, err := repo.Worktree() + if err != nil { + t.Fatalf("Worktree: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, "README.md"), []byte("# t"), 0o644); err != nil { + t.Fatalf("write README: %v", err) + } + if _, err := wt.Add("README.md"); err != nil { + t.Fatalf("add: %v", err) + } + if _, err := wt.Commit("init", &git.CommitOptions{ + Author: &object.Signature{Name: "T", Email: "t@t.com"}, + }); err != nil { + t.Fatalf("commit: %v", err) + } + + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + for i, s := range ids { + err := store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ + CheckpointID: id.MustCheckpointID(s), + SessionID: "session-" + s, + Strategy: "manual-commit", + Transcript: redact.AlreadyRedacted([]byte("line\n")), + Prompts: []string{"prompt"}, + AuthorName: "T", + AuthorEmail: "t@t.com", + CheckpointsCount: i + 1, + }) + if err != nil { + t.Fatalf("WriteCommitted(%s): %v", s, err) + } + } + return dir +} + +func TestBuildCheckpointList(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70", "cd1122334455") + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + + entries, err := buildCheckpointList(context.Background(), repo) + if err != nil { + t.Fatalf("buildCheckpointList: %v", err) + } + if len(entries) != 2 { + t.Fatalf("got %d entries, want 2", len(entries)) + } + got := []string{entries[0].ID.String(), entries[1].ID.String()} + sort.Strings(got) + want := []string{"ab3c4d5e6f70", "cd1122334455"} + if got[0] != want[0] || got[1] != want[1] { + t.Fatalf("ids = %v, want %v", got, want) + } + for _, e := range entries { + if e.Tree.IsZero() { + t.Fatalf("entry %s has zero tree hash", e.ID) + } + if _, err := repo.TreeObject(e.Tree); err != nil { + t.Fatalf("tree %s not a tree object: %v", e.Tree, err) + } + } +} + +func TestBuildCheckpointList_NoBranch(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t) // no checkpoints -> no v1 branch + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + entries, err := buildCheckpointList(context.Background(), repo) + if err != nil { + t.Fatalf("buildCheckpointList: %v", err) + } + if len(entries) != 0 { + t.Fatalf("got %d entries, want 0", len(entries)) + } +} + +func TestCacheFileRoundTrip(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + path := filepath.Join(dir, "nested", "checkpoints.tsv") + in := []checkpointEntry{ + {ID: id.MustCheckpointID("ab3c4d5e6f70"), Tree: plumbing.NewHash("1111111111111111111111111111111111111111")}, + {ID: id.MustCheckpointID("cd1122334455"), Tree: plumbing.NewHash("2222222222222222222222222222222222222222")}, + } + if err := writeCacheFile(path, in); err != nil { + t.Fatalf("writeCacheFile: %v", err) + } + out, err := readCacheFile(path) + if err != nil { + t.Fatalf("readCacheFile: %v", err) + } + if len(out) != 2 || out[0].ID != in[0].ID || out[0].Tree != in[0].Tree { + t.Fatalf("round trip mismatch: %+v", out) + } +} + +func TestReadCacheFile_Malformed(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + path := filepath.Join(dir, "bad.tsv") + if err := os.WriteFile(path, []byte("not-a-valid-line\n"), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + if _, err := readCacheFile(path); err == nil { + t.Fatalf("expected error for malformed line, got nil") + } +} + +func TestSnapshotExistingTreeRefs(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70") + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + + // Pre-create one tree ref and an unrelated ref to confirm filtering. + treeHash := plumbing.NewHash("1111111111111111111111111111111111111111") + if err := repo.Storer.SetReference(plumbing.NewHashReference( + "refs/entire/checkpoints/ab/3c4d5e6f70/tree", treeHash)); err != nil { + t.Fatalf("set tree ref: %v", err) + } + if err := repo.Storer.SetReference(plumbing.NewHashReference( + "refs/entire/checkpoints/v1.1", treeHash)); err != nil { + t.Fatalf("set v1.1 ref: %v", err) + } + + got, err := snapshotExistingTreeRefs(repo) + if err != nil { + t.Fatalf("snapshotExistingTreeRefs: %v", err) + } + if len(got) != 1 { + t.Fatalf("got %d refs, want 1: %v", len(got), got) + } + if h, ok := got["refs/entire/checkpoints/ab/3c4d5e6f70/tree"]; !ok || h != treeHash { + t.Fatalf("missing or wrong tree ref: %v", got) + } +} + +func TestProcessEntries_SkipsCorrectAndCollectsMissing(t *testing.T) { + t.Parallel() + + good := plumbing.NewHash("1111111111111111111111111111111111111111") + stale := plumbing.NewHash("2222222222222222222222222222222222222222") + fresh := plumbing.NewHash("3333333333333333333333333333333333333333") + + entries := []checkpointEntry{ + {ID: id.MustCheckpointID("aa0000000001"), Tree: good}, // already correct -> skip + {ID: id.MustCheckpointID("bb0000000002"), Tree: fresh}, // stale ref -> update + {ID: id.MustCheckpointID("cc0000000003"), Tree: fresh}, // missing -> create + } + existing := map[string]plumbing.Hash{ + "refs/entire/checkpoints/aa/0000000001/tree": good, + "refs/entire/checkpoints/bb/0000000002/tree": stale, + } + + var progress bytes.Buffer + updates, result, err := processEntries(context.Background(), entries, existing, 4, &progress) + if err != nil { + t.Fatalf("processEntries: %v", err) + } + + if result.Total != 3 || result.Skipped != 1 || result.Created != 2 { + t.Fatalf("result = %+v, want total=3 skipped=1 created=2", result) + } + if len(updates) != 2 { + t.Fatalf("got %d updates, want 2", len(updates)) + } + gotRefs := map[string]plumbing.Hash{} + for _, u := range updates { + gotRefs[u.Ref] = u.Hash + } + if gotRefs["refs/entire/checkpoints/bb/0000000002/tree"] != fresh { + t.Fatalf("bb not updated to fresh: %v", gotRefs) + } + if gotRefs["refs/entire/checkpoints/cc/0000000003/tree"] != fresh { + t.Fatalf("cc not created with fresh: %v", gotRefs) + } + if progress.Len() == 0 { + t.Fatalf("expected progress output, got none") + } +} + +func TestProcessEntries_ContextCancelled(t *testing.T) { + t.Parallel() + + entries := []checkpointEntry{ + {ID: id.MustCheckpointID("aa0000000001"), Tree: plumbing.NewHash("1111111111111111111111111111111111111111")}, + {ID: id.MustCheckpointID("bb0000000002"), Tree: plumbing.NewHash("2222222222222222222222222222222222222222")}, + {ID: id.MustCheckpointID("cc0000000003"), Tree: plumbing.NewHash("3333333333333333333333333333333333333333")}, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, _, err := processEntries(ctx, entries, map[string]plumbing.Hash{}, 4, &bytes.Buffer{}) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } +} + +func TestApplyRefUpdates_MultipleBatches(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70", "cd1122334455") + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + entries, err := buildCheckpointList(context.Background(), repo) + if err != nil || len(entries) != 2 { + t.Fatalf("buildCheckpointList: %v len=%d", err, len(entries)) + } + + updates := make([]refUpdate, 0, len(entries)) + for _, e := range entries { + updates = append(updates, refUpdate{Ref: treeRefName(e.ID).String(), Hash: e.Tree}) + } + + // batchSize=1 forces one batch per update (two batches here). + if err := applyRefUpdates(context.Background(), dir, updates, 1); err != nil { + t.Fatalf("applyRefUpdates: %v", err) + } + + for _, e := range entries { + ref, err := repo.Reference(treeRefName(e.ID), false) + if err != nil { + t.Fatalf("ref for %s missing: %v", e.ID, err) + } + if ref.Hash() != e.Tree { + t.Fatalf("ref %s -> %s, want %s", e.ID, ref.Hash(), e.Tree) + } + } +} + +func TestApplyRefUpdates(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70") + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + // Use the real subtree hash so update-ref's object existence check passes. + entries, err := buildCheckpointList(context.Background(), repo) + if err != nil || len(entries) != 1 { + t.Fatalf("buildCheckpointList: %v len=%d", err, len(entries)) + } + want := entries[0].Tree + + updates := []refUpdate{ + {Ref: "refs/entire/checkpoints/ab/3c4d5e6f70/tree", Hash: want}, + } + if err := applyRefUpdates(context.Background(), dir, updates, 1000); err != nil { + t.Fatalf("applyRefUpdates: %v", err) + } + + ref, err := repo.Reference("refs/entire/checkpoints/ab/3c4d5e6f70/tree", false) + if err != nil { + t.Fatalf("reference not created: %v", err) + } + if ref.Hash() != want { + t.Fatalf("ref points at %s, want %s", ref.Hash(), want) + } +} + +func TestApplyRefUpdates_Empty(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70") + if err := applyRefUpdates(context.Background(), dir, nil, 1000); err != nil { + t.Fatalf("applyRefUpdates(nil): %v", err) + } +} + +func TestRunMigrateTreeRefs_EndToEnd(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70", "cd1122334455") + cache := filepath.Join(t.TempDir(), "checkpoints.tsv") + + var stdout, progress bytes.Buffer + opts := migrateRefsOptions{ + repoRoot: dir, + cacheFile: cache, + workers: 4, + out: &stdout, + progress: &progress, + } + + res, err := runMigrateTreeRefs(context.Background(), opts) + if err != nil { + t.Fatalf("run: %v", err) + } + if res.Total != 2 || res.Created != 2 || res.Skipped != 0 { + t.Fatalf("first run result = %+v", res) + } + + // Cache file exists after phase 1. + if _, err := os.Stat(cache); err != nil { + t.Fatalf("cache file not written: %v", err) + } + + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + expected, err := buildCheckpointList(context.Background(), repo) + if err != nil { + t.Fatalf("buildCheckpointList: %v", err) + } + for _, e := range expected { + ref, err := repo.Reference(treeRefName(e.ID), false) + if err != nil { + t.Fatalf("ref for %s missing: %v", e.ID, err) + } + if ref.Hash() != e.Tree { + t.Fatalf("ref %s -> %s, want %s", e.ID, ref.Hash(), e.Tree) + } + } + + // Second run is idempotent: everything skipped. + res2, err := runMigrateTreeRefs(context.Background(), opts) + if err != nil { + t.Fatalf("second run: %v", err) + } + if res2.Created != 0 || res2.Skipped != 2 { + t.Fatalf("second run result = %+v, want created=0 skipped=2", res2) + } +} + +func TestRunMigrateTreeRefs_DryRun(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70") + cache := filepath.Join(t.TempDir(), "checkpoints.tsv") + var stdout, progress bytes.Buffer + opts := migrateRefsOptions{ + repoRoot: dir, cacheFile: cache, workers: 2, dryRun: true, + out: &stdout, progress: &progress, + } + res, err := runMigrateTreeRefs(context.Background(), opts) + if err != nil { + t.Fatalf("run: %v", err) + } + if res.Created != 1 { + t.Fatalf("dry-run result = %+v, want created=1 (would-create)", res) + } + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + if _, err := repo.Reference("refs/entire/checkpoints/ab/3c4d5e6f70/tree", false); err == nil { + t.Fatalf("dry-run must not create refs") + } +} + +func TestRunMigrateTreeRefs_ResumeReusesCache(t *testing.T) { + t.Parallel() + + dir := seedV1Checkpoints(t, "ab3c4d5e6f70") + cache := filepath.Join(t.TempDir(), "checkpoints.tsv") + // Pre-seed a cache file with a DIFFERENT id; without --refresh it is reused. + // The tree hash must be a real object: git update-ref rejects refs that point + // at nonexistent objects, so reuse the seeded checkpoint's actual subtree. + repoForTree, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + seeded, err := buildCheckpointList(context.Background(), repoForTree) + if err != nil || len(seeded) != 1 { + t.Fatalf("buildCheckpointList: %v len=%d", err, len(seeded)) + } + preTree := seeded[0].Tree + if err := writeCacheFile(cache, []checkpointEntry{ + {ID: id.MustCheckpointID("ff0000000099"), Tree: preTree}, + }); err != nil { + t.Fatalf("seed cache: %v", err) + } + var stdout, progress bytes.Buffer + opts := migrateRefsOptions{repoRoot: dir, cacheFile: cache, workers: 1, out: &stdout, progress: &progress} + res, err := runMigrateTreeRefs(context.Background(), opts) + if err != nil { + t.Fatalf("run: %v", err) + } + if res.Total != 1 { + t.Fatalf("resume should process the cached entry only, got total=%d", res.Total) + } + repo, err := gitrepo.OpenPath(dir) + if err != nil { + t.Fatalf("OpenPath: %v", err) + } + if _, err := repo.Reference("refs/entire/checkpoints/ff/0000000099/tree", false); err != nil { + t.Fatalf("cached entry ref not created: %v", err) + } +} + +func TestNewCheckpointMigrateRefsCmd_Hidden(t *testing.T) { + t.Parallel() + + cmd := newCheckpointMigrateRefsCmd() + if cmd.Use != "migrate-refs" { + t.Fatalf("Use = %q, want migrate-refs", cmd.Use) + } + if !cmd.Hidden { + t.Fatalf("command must be hidden") + } + for _, name := range []string{"workers", "cache-file", "refresh", "dry-run"} { + if cmd.Flags().Lookup(name) == nil { + t.Fatalf("missing --%s flag", name) + } + } +} + +func TestCheckpointGroup_RegistersMigrateRefs(t *testing.T) { + t.Parallel() + + group := newCheckpointGroupCmd() + var found bool + for _, c := range group.Commands() { + if c.Name() == "migrate-refs" { + found = true + break + } + } + if !found { + t.Fatalf("checkpoint group does not register migrate-refs") + } +}