From 1ee4e65c50f4d0ee8ca49861b48f52e4d9b549cb Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Thu, 23 Apr 2026 20:47:54 +0000 Subject: [PATCH 1/2] feat: synth dist-git /w lock (test) --- internal/app/azldev/cmds/component/build.go | 4 +- .../azldev/cmds/component/preparesources.go | 4 +- internal/app/azldev/cmds/component/render.go | 2 +- .../app/azldev/core/sources/sourceprep.go | 40 +- .../app/azldev/core/sources/synthistory.go | 757 +++++++++++++++--- .../azldev/core/sources/synthistory_test.go | 507 ++++++------ 6 files changed, 952 insertions(+), 362 deletions(-) diff --git a/internal/app/azldev/cmds/component/build.go b/internal/app/azldev/cmds/component/build.go index bad7ab5b..6123c575 100644 --- a/internal/app/azldev/cmds/component/build.go +++ b/internal/app/azldev/cmds/component/build.go @@ -253,7 +253,9 @@ func BuildComponent( var preparerOpts []sources.PreparerOption if options.WithGitRepo { - preparerOpts = append(preparerOpts, sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail)) + preparerOpts = append(preparerOpts, sources.WithGitRepo( + env.Config().Project.DefaultAuthorEmail, env, + )) } sourcePreparer, err := sources.NewPreparer(sourceManager, env.FS(), env, env, preparerOpts...) diff --git a/internal/app/azldev/cmds/component/preparesources.go b/internal/app/azldev/cmds/component/preparesources.go index e71b5c0e..6ce88328 100644 --- a/internal/app/azldev/cmds/component/preparesources.go +++ b/internal/app/azldev/cmds/component/preparesources.go @@ -128,7 +128,9 @@ func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) er var preparerOpts []sources.PreparerOption if options.WithGitRepo { - preparerOpts = append(preparerOpts, sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail)) + preparerOpts = append(preparerOpts, sources.WithGitRepo( + env.Config().Project.DefaultAuthorEmail, env, + )) } if options.AllowNoHashes { diff --git a/internal/app/azldev/cmds/component/render.go b/internal/app/azldev/cmds/component/render.go index b10b1a3c..5cec7c66 100644 --- a/internal/app/azldev/cmds/component/render.go +++ b/internal/app/azldev/cmds/component/render.go @@ -422,7 +422,7 @@ func prepareComponentSources( // WithSkipLookaside avoids expensive tarball downloads — only spec + // sidecar files are needed for rendering. preparerOpts := []sources.PreparerOption{ - sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail), + sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail, env), sources.WithSkipLookaside(), } diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index ab070730..481f5f04 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -64,11 +64,16 @@ type PreparerOption func(*sourcePreparerImpl) // Without this option, no dist-git is created and synthetic history is skipped. // // The defaultAuthorEmail is used for synthetic changelog entries and commits -// when no author email is available from git history. -func WithGitRepo(defaultAuthorEmail string) PreparerOption { +// when no author email is available from git history. The cmdFactory is used +// to shell out to git for fingerprint change detection. +func WithGitRepo( + defaultAuthorEmail string, + cmdFactory opctx.CmdFactory, +) PreparerOption { return func(p *sourcePreparerImpl) { p.withGitRepo = true p.defaultAuthorEmail = defaultAuthorEmail + p.cmdFactory = cmdFactory } } @@ -122,6 +127,10 @@ type sourcePreparerImpl struct { // entries and commits when no author email is available from git history. defaultAuthorEmail string + // cmdFactory is used to shell out to git for fingerprint change detection + // in the project repository. Set via [WithGitRepo]. + cmdFactory opctx.CmdFactory + // allowNoHashes, when true, allows source file references without hash // values. Missing hashes are computed from the downloaded files. allowNoHashes bool @@ -220,7 +229,7 @@ func (p *sourcePreparerImpl) PrepareSources( // Record the changes as synthetic git history when dist-git creation is enabled. if p.withGitRepo { - if err := p.trySyntheticHistory(component, outputDir); err != nil { + if err := p.trySyntheticHistory(ctx, component, outputDir); err != nil { return fmt.Errorf("failed to generate synthetic history for component %#q:\n%w", component.GetName(), err) } @@ -350,33 +359,38 @@ func initSourcesRepo(sourcesDirPath string) (*gogit.Repository, error) { } // trySyntheticHistory attempts to create synthetic git commits on top of the -// component's sources directory. If no .git directory exists, one is initialized -// with an initial commit so Affects commits can be layered on uniformly for all -// component types. +// component's sources directory. Synthetic commits are derived from lock file +// fingerprint changes in the project repository and interleaved into the +// upstream dist-git history. If no .git directory exists, one is initialized +// with an initial commit so synthetic commits can be layered on uniformly. // // Returns a non-nil error if history generation fails. func (p *sourcePreparerImpl) trySyntheticHistory( + ctx context.Context, component components.Component, sourcesDirPath string, ) error { config := component.GetConfig() + componentName := component.GetName() - // Build commit metadata from Affects commits. - commits, err := buildSyntheticCommits(config, component.GetName(), p.defaultAuthorEmail) + // Build commit metadata from lock file fingerprint changes. + changes, importCommit, err := buildSyntheticCommits( + ctx, p.cmdFactory, config, componentName, p.defaultAuthorEmail, + ) if err != nil { return fmt.Errorf("failed to build synthetic commits:\n%w", err) } - if len(commits) == 0 { + if len(changes) == 0 { slog.Debug("No synthetic commits to create; skipping history generation", - "component", component.GetName()) + "component", componentName) return nil } // Adjust the Release tag before staging changes. See [tryBumpStaticRelease] // for the handling of %autorelease, static integers, and non-standard values. - if err := p.tryBumpStaticRelease(component, sourcesDirPath, len(commits)); err != nil { + if err := p.tryBumpStaticRelease(component, sourcesDirPath, len(changes)); err != nil { return fmt.Errorf("failed to apply release bump:\n%w", err) } @@ -391,7 +405,7 @@ func (p *sourcePreparerImpl) trySyntheticHistory( if os.IsNotExist(statErr) { slog.Info("No .git directory in sources; initializing repository", - "component", component.GetName()) + "component", componentName) if _, err := initSourcesRepo(sourcesDirPath); err != nil { return fmt.Errorf("failed to initialize sources repository:\n%w", err) @@ -404,7 +418,7 @@ func (p *sourcePreparerImpl) trySyntheticHistory( return fmt.Errorf("failed to open sources repository at %#q:\n%w", sourcesDirPath, err) } - if err := CommitSyntheticHistory(sourcesRepo, commits); err != nil { + if err := CommitInterleavedHistory(sourcesRepo, changes, importCommit); err != nil { return fmt.Errorf("failed to commit synthetic history:\n%w", err) } diff --git a/internal/app/azldev/core/sources/synthistory.go b/internal/app/azldev/core/sources/synthistory.go index 763b9416..c0ef2195 100644 --- a/internal/app/azldev/core/sources/synthistory.go +++ b/internal/app/azldev/core/sources/synthistory.go @@ -4,23 +4,24 @@ package sources import ( + "bytes" + "context" "fmt" "log/slog" + "os/exec" "path/filepath" - "regexp" "slices" "strings" "time" gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + toml "github.com/pelletier/go-toml/v2" ) -// affectsRegexPattern is the regex pattern prefix used to match an "Affects:" trailer -// line in a commit message. Each line must contain exactly one component name. -const affectsRegexPattern = `(?m)^[ \t]*Affects:[ \t]*` - // CommitMetadata holds full metadata for a commit in the project repository. type CommitMetadata struct { Hash string @@ -30,176 +31,512 @@ type CommitMetadata struct { Message string } -// MessageAffectsComponent reports whether a commit message contains an "Affects:" -// trailer line naming the given component. -func MessageAffectsComponent(message, componentName string) bool { - re := regexp.MustCompile(affectsRegexPattern + regexp.QuoteMeta(componentName) + `[ \t]*$`) +// FingerprintChange records a project commit that changed a component's lock file +// fingerprint. [UpstreamCommit] is the value of the 'upstream-commit' field in the +// lock file at the point of the change. +type FingerprintChange struct { + CommitMetadata - return re.MatchString(message) + // UpstreamCommit is the upstream dist-git commit hash recorded in the lock + // file at the time the fingerprint changed. + UpstreamCommit string } -// FindAffectsCommits walks the git log from HEAD and returns metadata for all commits -// whose message contains an "Affects: " trailer line. Results are sorted -// chronologically (oldest first). -func FindAffectsCommits(repo *gogit.Repository, componentName string) ([]CommitMetadata, error) { - head, err := repo.Head() +// interleavedEntry represents a single commit in the rebuilt dist-git history. +// Exactly one of upstreamCommit or syntheticChange is non-nil. +type interleavedEntry struct { + upstreamCommit *object.Commit + syntheticChange *FingerprintChange +} + +// LockFilePath returns the relative path to a component's lock file within the +// project repository. The path follows the same letter-prefix convention used by +// [components.RenderedSpecDir]: specs///.lock. +func LockFilePath(componentName string) string { + prefix := strings.ToLower(componentName[:1]) + + return filepath.Join("specs", prefix, componentName, componentName+".lock") +} + +// lockFileFields holds the subset of lock file fields needed for fingerprint +// change detection. This avoids importing the full [lockfile.ComponentLock] +// struct and decouples the synthetic history logic from lock file versioning. +type lockFileFields struct { + ImportCommit string `toml:"import-commit"` + UpstreamCommit string `toml:"upstream-commit"` + InputFingerprint string `toml:"input-fingerprint"` +} + +// FindFingerprintChanges walks the git log of the project repository for commits +// that changed the given lock file and returns metadata for each commit where the +// 'input-fingerprint' field changed. Results are sorted chronologically (oldest +// first). +func FindFingerprintChanges( + ctx context.Context, + cmdFactory opctx.CmdFactory, + projectRepoDir string, + lockFileRelPath string, +) ([]FingerprintChange, error) { + // Get the list of commit hashes that touched the lock file. + hashes, err := gitLogFileHashes(ctx, cmdFactory, projectRepoDir, lockFileRelPath) if err != nil { - return nil, fmt.Errorf("failed to get HEAD reference:\n%w", err) + return nil, err } - commitIter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) - if err != nil { - return nil, fmt.Errorf("failed to iterate commit log:\n%w", err) + if len(hashes) == 0 { + return nil, nil } - var matches []CommitMetadata + // Build a chronological list of (hash, lockFileFields) for each commit. + type entry struct { + hash string + fields lockFileFields + meta CommitMetadata + } - err = commitIter.ForEach(func(commit *object.Commit) error { - if MessageAffectsComponent(commit.Message, componentName) { - matches = append(matches, CommitMetadata{ - Hash: commit.Hash.String(), - Author: commit.Author.Name, - AuthorEmail: commit.Author.Email, - Timestamp: commit.Author.When.Unix(), - Message: strings.TrimSpace(commit.Message), - }) + var entries []entry //nolint:prealloc // size not known ahead of time. + + for _, hash := range hashes { + fields, err := gitShowLockFile(ctx, cmdFactory, projectRepoDir, hash, lockFileRelPath) + if err != nil { + slog.Warn("Failed to read lock file at commit; skipping", + "commit", hash, "error", err) + + continue } - return nil - }) - if err != nil { - return nil, fmt.Errorf("failed to walk commit log:\n%w", err) + meta, err := gitCommitMetadata(ctx, cmdFactory, projectRepoDir, hash) + if err != nil { + return nil, fmt.Errorf("failed to get metadata for commit %#q:\n%w", hash, err) + } + + entries = append(entries, entry{hash: hash, fields: fields, meta: meta}) + } + + if len(entries) == 0 { + return nil, nil } - // Log iteration returns newest-first; reverse to get chronological order. - slices.Reverse(matches) + // Entries are newest-first (from git log order). Reverse to chronological. + slices.Reverse(entries) - return matches, nil + // Walk chronologically and detect fingerprint changes. + var changes []FingerprintChange + + prevFingerprint := "" + + for _, change := range entries { + if change.fields.InputFingerprint != prevFingerprint { + changes = append(changes, FingerprintChange{ + CommitMetadata: change.meta, + UpstreamCommit: change.fields.UpstreamCommit, + }) + } + + prevFingerprint = change.fields.InputFingerprint + } + + return changes, nil } -// CommitSyntheticHistory stages all pending working tree changes and creates synthetic -// commits in the provided git repository. The first commit captures all file changes; -// subsequent commits are created as empty commits to preserve the commit count for -// rpmautospec release numbering. -func CommitSyntheticHistory( +// CommitInterleavedHistory rebuilds the dist-git history by interleaving +// synthetic commits with the existing upstream commits. Synthetic commits +// referencing an older upstream commit are placed directly after that commit; +// those referencing the latest upstream commit are appended on top. The very +// last synthetic commit carries the overlay file changes; all others are empty. +// +// The resulting git history looks like: +// +// U1 → F1 → F2 → U2' → U3' → F3 → F4 +// +// where U1 is the import-commit (kept as-is), F1/F2 are synthetic commits +// interleaved after U1, U2'/U3' are upstream commits replayed with new parents, +// and F3/F4 are synthetic commits on top (F4 carries overlay changes). +// +// When importCommit is non-empty, only upstream commits from importCommit +// onward are considered for interleaving. +func CommitInterleavedHistory( repo *gogit.Repository, - commits []CommitMetadata, + changes []FingerprintChange, + importCommit string, ) error { + // Collect upstream commits BEFORE staging, so the temporary commit + // created by stageAndCaptureOverlayTree is not included. + upstreamCommits, err := collectUpstreamCommits(repo, importCommit) + if err != nil { + return err + } + + // Stage overlay changes and capture the resulting tree hash. + overlayTreeHash, err := stageAndCaptureOverlayTree(repo) + if err != nil { + return err + } + + // Build the full interleaved sequence of upstream and synthetic commits. + sequence := buildInterleavedSequence(upstreamCommits, changes) + + return replayInterleavedHistory(repo, sequence, overlayTreeHash) +} + +// stageAndCaptureOverlayTree stages all working tree changes and creates a +// temporary commit to capture the resulting tree hash. The tree hash is used +// later to set the content of the final synthetic commit. +func stageAndCaptureOverlayTree(repo *gogit.Repository) (plumbing.Hash, error) { worktree, err := repo.Worktree() if err != nil { - return fmt.Errorf("failed to get worktree:\n%w", err) + return plumbing.ZeroHash, fmt.Errorf("failed to get worktree:\n%w", err) } - // Stage all working tree changes once — overlays have already been applied. if err := worktree.AddWithOptions(&gogit.AddOptions{All: true}); err != nil { - return fmt.Errorf("failed to stage changes:\n%w", err) + return plumbing.ZeroHash, fmt.Errorf("failed to stage changes:\n%w", err) + } + + tempHash, err := worktree.Commit("temp: capture overlay tree", &gogit.CommitOptions{ + AllowEmptyCommits: true, + Author: &object.Signature{Name: "azldev", When: time.Unix(0, 0).UTC()}, + }) + if err != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to create temporary commit:\n%w", err) + } + + tempCommit, err := repo.CommitObject(tempHash) + if err != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to read temporary commit:\n%w", err) + } + + return tempCommit.TreeHash, nil +} + +// buildInterleavedSequence produces the full commit sequence for the rebuilt +// history. Upstream commits appear in chronological order; synthetic commits +// that reference an older upstream are inserted directly after it. Synthetic +// commits referencing the latest upstream (or orphaned ones whose upstream is +// not found) are appended at the end. +func buildInterleavedSequence( + upstreamCommits []*object.Commit, + changes []FingerprintChange, +) []interleavedEntry { + latestUpstream := changes[len(changes)-1].UpstreamCommit + + var interleaved, top []FingerprintChange + + for i := range changes { + if changes[i].UpstreamCommit == latestUpstream { + top = append(top, changes[i]) + } else { + interleaved = append(interleaved, changes[i]) + } } - for commitIdx, commitMeta := range commits { + // Build a lookup from upstream-commit hash → synthetic commits. + interleavedByUpstream := make(map[string][]FingerprintChange) + + for i := range interleaved { + hash := interleaved[i].UpstreamCommit + interleavedByUpstream[hash] = append(interleavedByUpstream[hash], interleaved[i]) + } + + // Walk upstream commits, inserting synthetics after their referenced commit. + sequence := make([]interleavedEntry, 0, len(upstreamCommits)+len(changes)) + + for i := range upstreamCommits { + sequence = append(sequence, interleavedEntry{upstreamCommit: upstreamCommits[i]}) + + hash := upstreamCommits[i].Hash.String() + if synthetics, ok := interleavedByUpstream[hash]; ok { + for j := range synthetics { + synth := synthetics[j] + sequence = append(sequence, interleavedEntry{syntheticChange: &synth}) + } + + delete(interleavedByUpstream, hash) + } + } + + // Orphaned changes whose upstream-commit wasn't found are dropped — + // they reference an upstream commit outside the known dist-git history. + for hash, orphaned := range interleavedByUpstream { + slog.Warn("Upstream commit referenced by fingerprint change not found in dist-git history; "+ + "dropping", + "upstreamCommit", hash, + "count", len(orphaned)) + } + + // Append "top" synthetic commits at the end. + for i := range top { + topChange := top[i] + sequence = append(sequence, interleavedEntry{syntheticChange: &topChange}) + } + + return sequence +} + +// replayInterleavedHistory walks the interleaved sequence and creates new +// commit objects with correct tree hashes and parent chains. The first upstream +// commit (import-commit) is kept as-is; subsequent upstream commits are +// recreated with updated parents. Synthetic commits are empty except for the +// very last one, which carries the overlay tree. +func replayInterleavedHistory( + repo *gogit.Repository, + sequence []interleavedEntry, + overlayTreeHash plumbing.Hash, +) error { + syntheticCount := countSyntheticEntries(sequence) + + var ( + lastHash plumbing.Hash + lastTreeHash plumbing.Hash + syntheticIdx int + ) + + for idx, entry := range sequence { + if idx == 0 && entry.upstreamCommit != nil { + lastHash = entry.upstreamCommit.Hash + lastTreeHash = entry.upstreamCommit.TreeHash + + continue + } + + if entry.upstreamCommit != nil { + hash, err := createCommitObject(repo, + entry.upstreamCommit.TreeHash, lastHash, + entry.upstreamCommit.Author, entry.upstreamCommit.Committer, + entry.upstreamCommit.Message) + if err != nil { + return fmt.Errorf("failed to replay upstream commit:\n%w", err) + } + + lastHash = hash + lastTreeHash = entry.upstreamCommit.TreeHash + + continue + } + + syntheticIdx++ + + isLast := syntheticIdx == syntheticCount + + treeHash := lastTreeHash + if isLast { + treeHash = overlayTreeHash + } + + change := entry.syntheticChange + author := object.Signature{ + Name: change.Author, + Email: change.AuthorEmail, + When: unixToTime(change.Timestamp), + } + + message := fmt.Sprintf("%s\n\nProject commit: %s", change.Message, change.Hash) + slog.Info("Creating synthetic commit", - "commit", commitIdx+1, - "total", len(commits), - "projectHash", commitMeta.Hash, + "commit", syntheticIdx, + "total", syntheticCount, + "projectHash", change.Hash, + "upstreamCommit", change.UpstreamCommit, + "isLast", isLast, ) - message := fmt.Sprintf("%s\n\nProject commit: %s", - commitMeta.Message, commitMeta.Hash) - - _, err := worktree.Commit(message, &gogit.CommitOptions{ - AllowEmptyCommits: true, - Author: &object.Signature{ - Name: commitMeta.Author, - Email: commitMeta.AuthorEmail, - When: unixToTime(commitMeta.Timestamp), - }, - }) + hash, err := createCommitObject(repo, treeHash, lastHash, author, author, message) if err != nil { - return fmt.Errorf("failed to create synthetic commit %d:\n%w", commitIdx+1, err) + return fmt.Errorf("failed to create synthetic commit %d:\n%w", syntheticIdx, err) + } + + lastHash = hash + lastTreeHash = treeHash + } + + if err := updateHead(repo, lastHash); err != nil { + return err + } + + slog.Info("Interleaved synthetic history complete", + "syntheticCommits", syntheticCount, + "totalCommits", len(sequence)) + + return nil +} + +// countSyntheticEntries returns the number of synthetic entries in the sequence. +func countSyntheticEntries(sequence []interleavedEntry) int { + count := 0 + + for _, entry := range sequence { + if entry.syntheticChange != nil { + count++ } } - slog.Info("Synthetic history generation complete", - "commitsCreated", len(commits)) + return count +} + +// createCommitObject creates a new commit in the repository's object store with +// the given tree, parent, author, committer, and message. +func createCommitObject( + repo *gogit.Repository, + treeHash, parentHash plumbing.Hash, + author, committer object.Signature, + message string, +) (plumbing.Hash, error) { + commit := &object.Commit{ + Author: author, + Committer: committer, + Message: message, + TreeHash: treeHash, + ParentHashes: []plumbing.Hash{parentHash}, + } + + obj := repo.Storer.NewEncodedObject() + if err := commit.Encode(obj); err != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to encode commit:\n%w", err) + } + + hash, err := repo.Storer.SetEncodedObject(obj) + if err != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to store commit:\n%w", err) + } + + return hash, nil +} + +// updateHead updates the HEAD reference (or the branch it points to) to the +// given commit hash. +func updateHead(repo *gogit.Repository, commitHash plumbing.Hash) error { + head, err := repo.Storer.Reference(plumbing.HEAD) + if err != nil { + return fmt.Errorf("failed to read HEAD reference:\n%w", err) + } + + // Resolve symbolic ref (e.g., HEAD → refs/heads/main). + name := plumbing.HEAD + if head.Type() != plumbing.HashReference { + name = head.Target() + } + + ref := plumbing.NewHashReference(name, commitHash) + if err := repo.Storer.SetReference(ref); err != nil { + return fmt.Errorf("failed to update HEAD to %s:\n%w", commitHash, err) + } return nil } -// buildSyntheticCommits resolves the project repository from the component's config file, -// walks the git log for commits containing "Affects: ", and returns the -// matching commit metadata sorted chronologically. If no Affects commits are found, a -// single default overlay commit is returned instead. +// buildSyntheticCommits resolves the project repository from the component's +// config file, walks the lock file's git history for fingerprint changes, and +// returns the matching [FingerprintChange] entries sorted chronologically. +// If no fingerprint changes are found, a single default commit is returned. +// The second return value is the import-commit hash from the lock file, used +// to scope the upstream commit walk in [CommitInterleavedHistory]. func buildSyntheticCommits( - config *projectconfig.ComponentConfig, componentName, defaultAuthorEmail string, -) ([]CommitMetadata, error) { + ctx context.Context, + cmdFactory opctx.CmdFactory, + config *projectconfig.ComponentConfig, + componentName string, + defaultAuthorEmail string, +) (changes []FingerprintChange, importCommit string, err error) { configFilePath, err := resolveConfigFilePath(config, componentName) if err != nil { - // No config file reference means this component can't have Affects commits. slog.Debug("Cannot resolve config file for synthetic commits; skipping", "component", componentName, "error", err) - return nil, nil + return nil, "", nil } - projectRepo, err := openProjectRepo(configFilePath) + projectRepoDir, err := resolveProjectRepoDir(ctx, cmdFactory, configFilePath) if err != nil { - return nil, err + return nil, "", err } - affectsCommits, err := FindAffectsCommits(projectRepo, componentName) + lockRelPath := LockFilePath(componentName) + + // Read the current lock file at HEAD to get the import-commit boundary. + headHash, err := gitHeadHash(ctx, cmdFactory, projectRepoDir) if err != nil { - return nil, fmt.Errorf("failed to find Affects commits for component %#q:\n%w", componentName, err) + return nil, "", fmt.Errorf("failed to get HEAD hash:\n%w", err) } - if len(affectsCommits) == 0 { - slog.Info("No commits with Affects marker found; "+ + headFields, headErr := gitShowLockFile(ctx, cmdFactory, projectRepoDir, headHash, lockRelPath) + if headErr == nil { + importCommit = headFields.ImportCommit + } + + fpChanges, err := FindFingerprintChanges(ctx, cmdFactory, projectRepoDir, lockRelPath) + if err != nil { + return nil, "", fmt.Errorf("failed to find fingerprint changes for component %#q:\n%w", + componentName, err) + } + + if len(fpChanges) == 0 { + slog.Info("No fingerprint changes found in lock file history; "+ "creating default commit", "component", componentName) - commit := defaultOverlayCommit(projectRepo, componentName, defaultAuthorEmail) + commit, commitErr := defaultOverlayCommit(ctx, cmdFactory, projectRepoDir, componentName, defaultAuthorEmail) + if commitErr != nil { + return nil, "", commitErr + } - return []CommitMetadata{commit}, nil + return []FingerprintChange{commit}, importCommit, nil } - slog.Info("Found commits affecting component", + slog.Info("Found fingerprint changes for component", "component", componentName, - "commitCount", len(affectsCommits)) + "changeCount", len(fpChanges)) - return affectsCommits, nil + return fpChanges, importCommit, nil } -// defaultOverlayCommit returns a single [CommitMetadata] entry that represents a generic -// commit when no Affects commits exist in the project history. The commit hash is -// set to the current HEAD of the project repository. -func defaultOverlayCommit(repo *gogit.Repository, componentName, +// defaultOverlayCommit returns a single [FingerprintChange] entry that represents +// a generic commit when no fingerprint changes exist in the lock file history. +// The [FingerprintChange.UpstreamCommit] is read from the current lock file HEAD. +func defaultOverlayCommit( + ctx context.Context, + cmdFactory opctx.CmdFactory, + projectRepoDir string, + componentName string, defaultAuthorEmail string, -) CommitMetadata { +) (FingerprintChange, error) { if defaultAuthorEmail == "" { slog.Warn("No default author email configured; synthetic commit will have an empty author email", - "hint", "set project.default-author-email in the project config") + "hint", "set 'project.default-author-email' in the project config") } - var ( - timestamp int64 - hash string - ) + hash, err := gitHeadHash(ctx, cmdFactory, projectRepoDir) + if err != nil { + return FingerprintChange{}, fmt.Errorf("failed to get HEAD hash for default overlay commit:\n%w", err) + } - if head, err := repo.Head(); err == nil { - hash = head.Hash().String() - if commit, commitErr := repo.CommitObject(head.Hash()); commitErr == nil { - timestamp = commit.Author.When.Unix() - } + meta, err := gitCommitMetadata(ctx, cmdFactory, projectRepoDir, hash) + if err != nil { + return FingerprintChange{}, fmt.Errorf("failed to get HEAD metadata for default overlay commit:\n%w", err) } - return CommitMetadata{ - Hash: hash, - Author: "azldev", - AuthorEmail: defaultAuthorEmail, - Timestamp: timestamp, - Message: "Latest state for " + componentName, + // Try to read the lock file at HEAD to get the upstream-commit. + lockRelPath := LockFilePath(componentName) + + var upstreamCommit string + + fields, lockErr := gitShowLockFile(ctx, cmdFactory, projectRepoDir, hash, lockRelPath) + if lockErr == nil { + upstreamCommit = fields.UpstreamCommit } + + return FingerprintChange{ + CommitMetadata: CommitMetadata{ + Hash: hash, + Author: "azldev", + AuthorEmail: defaultAuthorEmail, + Timestamp: meta.Timestamp, + Message: "Latest state for " + componentName, + }, + UpstreamCommit: upstreamCommit, + }, nil } -// resolveConfigFilePath extracts and validates the source config file path from the component config. +// resolveConfigFilePath extracts and validates the source config file path from +// the component config. func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName string) (string, error) { configFile := config.SourceConfigFile if configFile == nil { @@ -214,21 +551,217 @@ func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName return configFilePath, nil } -// openProjectRepo finds and opens the git repository containing configFilePath by -// walking up the directory tree. -func openProjectRepo(configFilePath string) (*gogit.Repository, error) { - repo, err := gogit.PlainOpenWithOptions(filepath.Dir(configFilePath), &gogit.PlainOpenOptions{ - DetectDotGit: true, +// resolveProjectRepoDir returns the root directory of the git repository +// containing configFilePath by running 'git rev-parse --show-toplevel'. +func resolveProjectRepoDir( + ctx context.Context, cmdFactory opctx.CmdFactory, configFilePath string, +) (string, error) { + var stderr bytes.Buffer + + rawCmd := exec.CommandContext(ctx, "git", "-C", filepath.Dir(configFilePath), + "rev-parse", "--show-toplevel") + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return "", fmt.Errorf("failed to create git command:\n%w", err) + } + + output, err := cmd.RunAndGetOutput(ctx) + if err != nil { + return "", fmt.Errorf("failed to find project repository for config file %#q:\n%v\n%w", + configFilePath, stderr.String(), err) + } + + return strings.TrimSpace(output), nil +} + +// collectUpstreamCommits returns commits in the repository in chronological +// order (oldest first). When importCommit is non-empty, only commits from +// importCommit (inclusive) onward are returned. When empty, the full history +// is returned. +func collectUpstreamCommits(repo *gogit.Repository, importCommit string) ([]*object.Commit, error) { + head, err := repo.Head() + if err != nil { + return nil, fmt.Errorf("failed to get HEAD reference:\n%w", err) + } + + iter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) + if err != nil { + return nil, fmt.Errorf("failed to iterate commit log:\n%w", err) + } + + var commits []*object.Commit + + err = iter.ForEach(func(c *object.Commit) error { + commits = append(commits, c) + + return nil }) if err != nil { - return nil, fmt.Errorf("failed to find project repository for config file %#q:\n%w", - configFilePath, err) + return nil, fmt.Errorf("failed to walk commit log:\n%w", err) + } + + // git log returns newest-first; reverse to chronological. + slices.Reverse(commits) + + // If an import-commit boundary is set, trim commits before it. + if importCommit != "" { + for idx, commit := range commits { + if commit.Hash.String() == importCommit { + commits = commits[idx:] + + return commits, nil + } + } + + slog.Warn("Import-commit not found in upstream history; using full history", + "importCommit", importCommit) } - return repo, nil + return commits, nil } // unixToTime converts a Unix timestamp to a [time.Time] in UTC. func unixToTime(unix int64) time.Time { return time.Unix(unix, 0).UTC() } + +// --- git CLI helpers --- + +// gitLogFileHashes returns the commit hashes (newest-first) that touched the +// given file path, scoped to the project repository at repoDir. +func gitLogFileHashes( + ctx context.Context, cmdFactory opctx.CmdFactory, repoDir, filePath string, +) ([]string, error) { + var stderr bytes.Buffer + + rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, + "log", "--format=%H", "--follow", "--", filePath) + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return nil, fmt.Errorf("failed to create git log command:\n%w", err) + } + + output, err := cmd.RunAndGetOutput(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list commits for %#q:\n%v\n%w", + filePath, stderr.String(), err) + } + + output = strings.TrimSpace(output) + if output == "" { + return nil, nil + } + + return strings.Split(output, "\n"), nil +} + +// gitShowLockFile reads the lock file content at a specific commit and parses +// the 'upstream-commit' and 'input-fingerprint' TOML fields. +func gitShowLockFile( + ctx context.Context, cmdFactory opctx.CmdFactory, + repoDir, commitHash, lockFileRelPath string, +) (lockFileFields, error) { + var stderr bytes.Buffer + + ref := commitHash + ":" + lockFileRelPath + + rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, "show", ref) + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return lockFileFields{}, fmt.Errorf("failed to create git show command:\n%w", err) + } + + output, err := cmd.RunAndGetOutput(ctx) + if err != nil { + return lockFileFields{}, fmt.Errorf("failed to read lock file at %#q:\n%v\n%w", + ref, stderr.String(), err) + } + + var fields lockFileFields + if err := toml.Unmarshal([]byte(output), &fields); err != nil { + return lockFileFields{}, fmt.Errorf("failed to parse lock file at %#q:\n%w", ref, err) + } + + return fields, nil +} + +// gitCommitMetadata returns the [CommitMetadata] for a single commit hash. +func gitCommitMetadata( + ctx context.Context, cmdFactory opctx.CmdFactory, repoDir, commitHash string, +) (CommitMetadata, error) { + var stderr bytes.Buffer + + // Format: hash, author name, author email, author date (unix), subject. + rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, + "log", "-1", "--format=%H%n%an%n%ae%n%at%n%s", commitHash) + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return CommitMetadata{}, fmt.Errorf("failed to create git log command:\n%w", err) + } + + output, err := cmd.RunAndGetOutput(ctx) + if err != nil { + return CommitMetadata{}, fmt.Errorf("failed to get commit metadata for %#q:\n%v\n%w", + commitHash, stderr.String(), err) + } + + return ParseCommitMetadata(output) +} + +// commitMetadataFieldCount is the number of fields expected in the output of +// 'git log -1 --format=%H%n%an%n%ae%n%at%n%s'. +const commitMetadataFieldCount = 5 + +// ParseCommitMetadata parses the output of 'git log -1 --format=%H%n%an%n%ae%n%at%n%s'. +func ParseCommitMetadata(output string) (CommitMetadata, error) { + lines := strings.SplitN(strings.TrimSpace(output), "\n", commitMetadataFieldCount) + + if len(lines) < commitMetadataFieldCount { + return CommitMetadata{}, fmt.Errorf( + "unexpected git log output (expected %d lines, got %d):\n%v", + commitMetadataFieldCount, len(lines), output) + } + + var timestamp int64 + if _, err := fmt.Sscanf(lines[3], "%d", ×tamp); err != nil { + return CommitMetadata{}, fmt.Errorf("failed to parse timestamp %#q:\n%w", lines[3], err) + } + + return CommitMetadata{ + Hash: lines[0], + Author: lines[1], + AuthorEmail: lines[2], + Timestamp: timestamp, + Message: lines[4], + }, nil +} + +// gitHeadHash returns the HEAD commit hash of the repository at repoDir. +func gitHeadHash( + ctx context.Context, cmdFactory opctx.CmdFactory, repoDir string, +) (string, error) { + var stderr bytes.Buffer + + rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, "rev-parse", "HEAD") + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return "", fmt.Errorf("failed to create git rev-parse command:\n%w", err) + } + + output, err := cmd.RunAndGetOutput(ctx) + if err != nil { + return "", fmt.Errorf("failed to get HEAD hash:\n%v\n%w", stderr.String(), err) + } + + return strings.TrimSpace(output), nil +} diff --git a/internal/app/azldev/core/sources/synthistory_test.go b/internal/app/azldev/core/sources/synthistory_test.go index 24ef8209..d44cdc0b 100644 --- a/internal/app/azldev/core/sources/synthistory_test.go +++ b/internal/app/azldev/core/sources/synthistory_test.go @@ -4,7 +4,6 @@ package sources_test import ( - "fmt" "testing" "time" @@ -17,258 +16,170 @@ import ( "github.com/stretchr/testify/require" ) -// createInMemoryRepo creates an empty in-memory git repository. -func createInMemoryRepo(t *testing.T) *gogit.Repository { - t.Helper() - - repo, err := gogit.Init(memory.NewStorage(), memfs.New()) - require.NoError(t, err) +func TestLockFilePath(t *testing.T) { + tests := []struct { + name string + componentName string + want string + }{ + {"simple name", "curl", "specs/c/curl/curl.lock"}, + {"hyphenated name", "curl-minimal", "specs/c/curl-minimal/curl-minimal.lock"}, + {"uppercase first letter", "Kernel", "specs/k/Kernel/Kernel.lock"}, + {"single char name", "a", "specs/a/a/a.lock"}, + {"long name", "golang-github-example", "specs/g/golang-github-example/golang-github-example.lock"}, + } - return repo + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sources.LockFilePath(tt.componentName) + assert.Equal(t, tt.want, got) + }) + } } -// addCommit creates a commit in the in-memory repository with the given message, author name, -// email, and timestamp. A dummy file change is added to ensure the commit is non-empty. -func addCommit( - t *testing.T, repo *gogit.Repository, message, authorName, authorEmail string, when time.Time, -) { - t.Helper() +func TestCommitInterleavedHistory_AllOnTop(t *testing.T) { + // When all fingerprint changes reference the latest upstream commit, + // all synthetic commits should be appended on top. + memFS := memfs.New() + storer := memory.NewStorage() - worktree, err := repo.Worktree() + repo, err := gogit.Init(storer, memFS) require.NoError(t, err) - fs := worktree.Filesystem - - // Write a unique file per commit to guarantee a non-empty diff. - fileName := fmt.Sprintf("file-%d.txt", when.UnixNano()) + worktree, err := repo.Worktree() + require.NoError(t, err) - f, err := fs.Create(fileName) + // Create an upstream commit. + file, err := memFS.Create("package.spec") require.NoError(t, err) - _, err = f.Write([]byte(message)) + _, err = file.Write([]byte("Name: package\nVersion: 1.0\n")) require.NoError(t, err) - require.NoError(t, f.Close()) + require.NoError(t, file.Close()) - _, err = worktree.Add(fileName) + _, err = worktree.Add("package.spec") require.NoError(t, err) - _, err = worktree.Commit(message, &gogit.CommitOptions{ + upstreamCommit, err := worktree.Commit("upstream: initial", &gogit.CommitOptions{ Author: &object.Signature{ - Name: authorName, - Email: authorEmail, - When: when, + Name: "Upstream", + Email: "upstream@fedora.org", + When: time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC), }, }) require.NoError(t, err) -} - -func TestFindAffectsCommits(t *testing.T) { - repo := createInMemoryRepo(t) - - // Three commits: two mention curl, one does not. - addCommit(t, repo, - "Initial setup", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Fix CVE-2025-1234\n\nAffects: curl", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Bump release\n\nAffects: curl", - "Charlie", "charlie@example.com", - time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC)) - results, err := sources.FindAffectsCommits(repo, "curl") + // Simulate overlay modification. + specFile, err := memFS.Create("package.spec") require.NoError(t, err) - // Expect 2 matching commits, oldest first. - require.Len(t, results, 2) - - assert.Equal(t, "Bob", results[0].Author) - assert.Equal(t, "bob@example.com", results[0].AuthorEmail) - assert.Contains(t, results[0].Message, "Fix CVE-2025-1234") - - assert.Equal(t, "Charlie", results[1].Author) - assert.Equal(t, "charlie@example.com", results[1].AuthorEmail) - assert.Contains(t, results[1].Message, "Bump release") - - // Chronological order: Bob's timestamp < Charlie's timestamp. - assert.Less(t, results[0].Timestamp, results[1].Timestamp) -} - -func TestFindAffectsCommits_NoMatches(t *testing.T) { - repo := createInMemoryRepo(t) - - addCommit(t, repo, - "Unrelated change", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) - - results, err := sources.FindAffectsCommits(repo, "curl") + _, err = specFile.Write([]byte("Name: package\nVersion: 1.0\n# overlays applied\n")) require.NoError(t, err) - assert.Empty(t, results) -} - -func TestFindAffectsCommits_MultipleComponents(t *testing.T) { - repo := createInMemoryRepo(t) - - addCommit(t, repo, - "Fix curl issue\n\nAffects: curl", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) + require.NoError(t, specFile.Close()) - addCommit(t, repo, - "Fix wget issue\n\nAffects: wget", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) + upstreamHash := upstreamCommit.String() - addCommit(t, repo, - "Fix both\n\nAffects: curl\nAffects: wget", - "Charlie", "charlie@example.com", - time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC)) + changes := []sources.FingerprintChange{ + { + CommitMetadata: sources.CommitMetadata{ + Hash: "abc123", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), + Message: "Apply patch fix", + }, + UpstreamCommit: upstreamHash, + }, + { + CommitMetadata: sources.CommitMetadata{ + Hash: "def456", + Author: "Bob", + AuthorEmail: "bob@example.com", + Timestamp: time.Date(2025, 2, 20, 14, 0, 0, 0, time.UTC).Unix(), + Message: "Bump release", + }, + UpstreamCommit: upstreamHash, + }, + } - curlResults, err := sources.FindAffectsCommits(repo, "curl") + err = sources.CommitInterleavedHistory(repo, changes, "") require.NoError(t, err) - require.Len(t, curlResults, 2, "curl should match 2 commits") - assert.Equal(t, "Alice", curlResults[0].Author) - assert.Equal(t, "Charlie", curlResults[1].Author) - wgetResults, err := sources.FindAffectsCommits(repo, "wget") + // Verify the commit log: upstream + 2 synthetic = 3 commits. + head, err := repo.Head() require.NoError(t, err) - require.Len(t, wgetResults, 2, "wget should match 2 commits") - assert.Equal(t, "Bob", wgetResults[0].Author) - assert.Equal(t, "Charlie", wgetResults[1].Author) -} -func TestFindAffectsCommits_NoSubstringMatch(t *testing.T) { - repo := createInMemoryRepo(t) + commitIter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) + require.NoError(t, err) - // "Affects: curl-minimal" should NOT match when searching for "curl". - addCommit(t, repo, - "Update curl-minimal\n\nAffects: curl-minimal", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) + var logCommits []*object.Commit - addCommit(t, repo, - "Update curl itself\n\nAffects: curl", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) + err = commitIter.ForEach(func(c *object.Commit) error { + logCommits = append(logCommits, c) - // Searching for "curl" matches only Bob's commit (exact component name). - curlResults, err := sources.FindAffectsCommits(repo, "curl") + return nil + }) require.NoError(t, err) - require.Len(t, curlResults, 1, "exact match should not include curl-minimal commit") - assert.Equal(t, "Bob", curlResults[0].Author) - // Searching for "curl-minimal" matches only Alice's commit. - minimalResults, err := sources.FindAffectsCommits(repo, "curl-minimal") - require.NoError(t, err) - require.Len(t, minimalResults, 1) - assert.Equal(t, "Alice", minimalResults[0].Author) -} + require.Len(t, logCommits, 3, "should have upstream + 2 synthetic commits") -func TestFindAffectsCommits_AffectsInSubject(t *testing.T) { - repo := createInMemoryRepo(t) + // Most recent commit (Bob's) — this is the last synthetic commit. + assert.Contains(t, logCommits[0].Message, "Bump release") + assert.Equal(t, "Bob", logCommits[0].Author.Name) - // Affects marker in the subject line (not just the body). - addCommit(t, repo, - "Affects: curl", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) + // Second commit (Alice's). + assert.Contains(t, logCommits[1].Message, "Apply patch fix") + assert.Equal(t, "Alice", logCommits[1].Author.Name) - results, err := sources.FindAffectsCommits(repo, "curl") - require.NoError(t, err) - require.Len(t, results, 1) - assert.Equal(t, "Alice", results[0].Author) + // Original upstream commit. + assert.Equal(t, "upstream: initial", logCommits[2].Message) } -func TestFindAffectsCommits_CaseSensitive(t *testing.T) { - repo := createInMemoryRepo(t) - - addCommit(t, repo, - "Bump release\n\nAffects: Kernel", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Fix CVE\n\nAFFECTS: KERNEL", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Upstream fix\n\nAffects: kernel", - "Charlie", "charlie@example.com", - time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC)) +func TestCommitInterleavedHistory_Interleaved(t *testing.T) { + // Two upstream commits, one synthetic change for the first (older) upstream + // commit and one for the second (latest). The interleaved commit should + // appear between the two upstream commits. + memFS := memfs.New() + storer := memory.NewStorage() - // Matching is case-sensitive: searching for "kernel" only matches the exact-case commit. - results, err := sources.FindAffectsCommits(repo, "kernel") + repo, err := gogit.Init(storer, memFS) require.NoError(t, err) - require.Len(t, results, 1) - assert.Equal(t, "Charlie", results[0].Author) - // Searching for "Kernel" matches only Alice's commit (exact case on component name). - results, err = sources.FindAffectsCommits(repo, "Kernel") + worktree, err := repo.Worktree() require.NoError(t, err) - require.Len(t, results, 1) - assert.Equal(t, "Alice", results[0].Author) -} - -func TestMessageAffectsComponent(t *testing.T) { - tests := []struct { - name string - message string - component string - want bool - }{ - // Positive matches. - {"exact match in body", "Fix bug\n\nAffects: curl", "curl", true}, - {"trailing whitespace", "Fix bug\n\nAffects: curl ", "curl", true}, - {"leading whitespace on line", "Fix bug\n\n Affects: curl", "curl", true}, - {"subject line only", "Affects: curl", "curl", true}, - - // Negative matches. - {"different component", "Fix bug\n\nAffects: wget", "curl", false}, - {"no substring match", "Fix bug\n\nAffects: curl-minimal", "curl", false}, - {"comma separated", "Fix bug\n\nAffects: curl, wget", "curl", false}, - {"extra text after name", "Affects: curl - fix build failure", "curl", false}, - {"case sensitive", "Fix bug\n\nAffects: Curl", "curl", false}, - {"no match across newlines", "Fix bug\n\nAffects:\ncurl", "curl", false}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := sources.MessageAffectsComponent(tt.message, tt.component) - assert.Equal(t, tt.want, got) - }) - } -} + // Upstream commit 1. + file1, err := memFS.Create("package.spec") + require.NoError(t, err) -func TestCommitSyntheticHistory(t *testing.T) { - // Create an in-memory repo with an initial commit (simulating upstream). - memFS := memfs.New() - storer := memory.NewStorage() + _, err = file1.Write([]byte("Name: package\nVersion: 1.0\n")) + require.NoError(t, err) + require.NoError(t, file1.Close()) - repo, err := gogit.Init(storer, memFS) + _, err = worktree.Add("package.spec") require.NoError(t, err) - worktree, err := repo.Worktree() + upstream1, err := worktree.Commit("upstream: v1.0", &gogit.CommitOptions{ + Author: &object.Signature{ + Name: "Upstream", + Email: "upstream@fedora.org", + When: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }) require.NoError(t, err) - // Create an initial file (upstream). - file, err := memFS.Create("package.spec") + // Upstream commit 2. + file2, err := memFS.Create("package.spec") require.NoError(t, err) - _, err = file.Write([]byte("Name: package\nVersion: 1.0\n")) + _, err = file2.Write([]byte("Name: package\nVersion: 2.0\n")) require.NoError(t, err) - require.NoError(t, file.Close()) + require.NoError(t, file2.Close()) _, err = worktree.Add("package.spec") require.NoError(t, err) - _, err = worktree.Commit("upstream: initial", &gogit.CommitOptions{ + upstream2, err := worktree.Commit("upstream: v2.0", &gogit.CommitOptions{ Author: &object.Signature{ Name: "Upstream", Email: "upstream@fedora.org", @@ -277,36 +188,45 @@ func TestCommitSyntheticHistory(t *testing.T) { }) require.NoError(t, err) - // Simulate overlay application by modifying the working tree before committing. + // Simulate overlay modification in working tree. specFile, err := memFS.Create("package.spec") require.NoError(t, err) - _, err = specFile.Write([]byte("Name: package\nVersion: 1.0\n# overlays applied\n")) + _, err = specFile.Write([]byte("Name: package\nVersion: 2.0\n# overlays\n")) require.NoError(t, err) require.NoError(t, specFile.Close()) - // Define synthetic commits. - commits := []sources.CommitMetadata{ + changes := []sources.FingerprintChange{ { - Hash: "abc123def456", - Author: "Alice", - AuthorEmail: "alice@example.com", - Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), - Message: "Apply patch fix", + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-aaa", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2024, 3, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Fix for v1.0", + }, + UpstreamCommit: upstream1.String(), // references older upstream. }, { - Hash: "789abc012def", - Author: "Bob", - AuthorEmail: "bob@example.com", - Timestamp: time.Date(2025, 2, 20, 14, 0, 0, 0, time.UTC).Unix(), - Message: "Bump release", + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-bbb", + Author: "Bob", + AuthorEmail: "bob@example.com", + Timestamp: time.Date(2024, 7, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Fix for v2.0", + }, + UpstreamCommit: upstream2.String(), // references latest upstream. }, } - err = sources.CommitSyntheticHistory(repo, commits) + err = sources.CommitInterleavedHistory(repo, changes, upstream1.String()) require.NoError(t, err) - // Verify the commit log has 3 commits: upstream + 2 synthetic. + // Expected order (newest first): + // 1. "Fix for v2.0" (synthetic, on top — latest upstream, with overlay) + // 2. "upstream: v2.0" (replayed with new parent) + // 3. "Fix for v1.0" (synthetic, interleaved after upstream v1.0) + // 4. "upstream: v1.0" (import-commit, kept as-is) head, err := repo.Head() require.NoError(t, err) @@ -322,22 +242,15 @@ func TestCommitSyntheticHistory(t *testing.T) { }) require.NoError(t, err) - require.Len(t, logCommits, 3, "should have upstream + 2 synthetic commits") - - // Most recent commit (Bob's) — empty commit. - assert.Contains(t, logCommits[0].Message, "Bump release") - assert.Equal(t, "Bob", logCommits[0].Author.Name) - assert.Equal(t, "bob@example.com", logCommits[0].Author.Email) + require.Len(t, logCommits, 4, "should have 2 upstream + 2 synthetic commits") - // Second commit (Alice's) — has the actual file changes. - assert.Contains(t, logCommits[1].Message, "Apply patch fix") - assert.Equal(t, "Alice", logCommits[1].Author.Name) - - // Original upstream commit. - assert.Equal(t, "upstream: initial", logCommits[2].Message) + assert.Contains(t, logCommits[0].Message, "Fix for v2.0") // top synthetic (latest) + assert.Contains(t, logCommits[1].Message, "upstream: v2.0") // replayed upstream 2 + assert.Contains(t, logCommits[2].Message, "Fix for v1.0") // interleaved synthetic + assert.Contains(t, logCommits[3].Message, "upstream: v1.0") // import-commit (kept) } -func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { +func TestCommitInterleavedHistory_SingleCommit(t *testing.T) { memFS := memfs.New() storer := memory.NewStorage() @@ -357,7 +270,7 @@ func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { _, err = worktree.Add("package.spec") require.NoError(t, err) - _, err = worktree.Commit("upstream: initial", &gogit.CommitOptions{ + upstream, err := worktree.Commit("upstream: initial", &gogit.CommitOptions{ Author: &object.Signature{ Name: "Upstream", Email: "upstream@fedora.org", @@ -374,17 +287,20 @@ func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { require.NoError(t, err) require.NoError(t, specFile.Close()) - commits := []sources.CommitMetadata{ + changes := []sources.FingerprintChange{ { - Hash: "abc123", - Author: "Alice", - AuthorEmail: "alice@example.com", - Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), - Message: "Fix build", + CommitMetadata: sources.CommitMetadata{ + Hash: "abc123", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), + Message: "Fix build", + }, + UpstreamCommit: upstream.String(), }, } - err = sources.CommitSyntheticHistory(repo, commits) + err = sources.CommitInterleavedHistory(repo, changes, "") require.NoError(t, err) // Verify working tree changes are in the single synthetic commit. @@ -408,3 +324,126 @@ func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { require.NoError(t, err) assert.Contains(t, content, "# modified") } + +func TestCommitInterleavedHistory_OrphanUpstreamCommit(t *testing.T) { + // When a fingerprint change references an upstream commit that doesn't + // exist in the dist-git history, it should be dropped (not appended). + memFS := memfs.New() + storer := memory.NewStorage() + + repo, err := gogit.Init(storer, memFS) + require.NoError(t, err) + + worktree, err := repo.Worktree() + require.NoError(t, err) + + file, err := memFS.Create("package.spec") + require.NoError(t, err) + + _, err = file.Write([]byte("Name: package\n")) + require.NoError(t, err) + require.NoError(t, file.Close()) + + _, err = worktree.Add("package.spec") + require.NoError(t, err) + + upstream, err := worktree.Commit("upstream: initial", &gogit.CommitOptions{ + Author: &object.Signature{ + Name: "Upstream", + Email: "upstream@fedora.org", + When: time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC), + }, + }) + require.NoError(t, err) + + changes := []sources.FingerprintChange{ + { + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-orphan", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2024, 3, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Fix for unknown upstream", + }, + UpstreamCommit: "deadbeefdeadbeef", // not in dist-git history. + }, + { + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-latest", + Author: "Bob", + AuthorEmail: "bob@example.com", + Timestamp: time.Date(2024, 7, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Latest fix", + }, + UpstreamCommit: upstream.String(), // latest. + }, + } + + err = sources.CommitInterleavedHistory(repo, changes, "") + require.NoError(t, err) + + head, err := repo.Head() + require.NoError(t, err) + + commitIter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) + require.NoError(t, err) + + var logCommits []*object.Commit + + err = commitIter.ForEach(func(c *object.Commit) error { + logCommits = append(logCommits, c) + + return nil + }) + require.NoError(t, err) + + // Only the latest-upstream synthetic commit is included; orphan is dropped. + require.Len(t, logCommits, 2) + assert.Contains(t, logCommits[0].Message, "Latest fix") + assert.Equal(t, "upstream: initial", logCommits[1].Message) +} + +func TestParseCommitMetadata(t *testing.T) { + tests := []struct { + name string + input string + want sources.CommitMetadata + wantErr bool + }{ + { + name: "valid output", + input: "abc123def456\nAlice\nalice@example.com\n1706100000\nFix CVE-2025-1234", + want: sources.CommitMetadata{ + Hash: "abc123def456", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: 1706100000, + Message: "Fix CVE-2025-1234", + }, + }, + { + name: "too few lines", + input: "abc123\nAlice\nalice@example.com", + wantErr: true, + }, + { + name: "invalid timestamp", + input: "abc123\nAlice\nalice@example.com\nnot-a-number\nFix bug", + wantErr: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + got, err := sources.ParseCommitMetadata(test.input) + if test.wantErr { + assert.Error(t, err) + + return + } + + require.NoError(t, err) + assert.Equal(t, test.want, got) + }) + } +} From d0b1ecea9f7475ff26b1a48cf67698ca347f4317 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Fri, 24 Apr 2026 22:44:27 +0000 Subject: [PATCH 2/2] fixes --- internal/app/azldev/cmds/component/build.go | 4 +- .../azldev/cmds/component/preparesources.go | 4 +- internal/app/azldev/cmds/component/render.go | 2 +- .../app/azldev/core/sources/sourceprep.go | 13 +- .../app/azldev/core/sources/synthistory.go | 302 +++++++----------- internal/utils/git/git.go | 28 ++ 6 files changed, 146 insertions(+), 207 deletions(-) diff --git a/internal/app/azldev/cmds/component/build.go b/internal/app/azldev/cmds/component/build.go index 6123c575..738a1074 100644 --- a/internal/app/azldev/cmds/component/build.go +++ b/internal/app/azldev/cmds/component/build.go @@ -253,9 +253,7 @@ func BuildComponent( var preparerOpts []sources.PreparerOption if options.WithGitRepo { - preparerOpts = append(preparerOpts, sources.WithGitRepo( - env.Config().Project.DefaultAuthorEmail, env, - )) + preparerOpts = append(preparerOpts, sources.WithGitRepo(env)) } sourcePreparer, err := sources.NewPreparer(sourceManager, env.FS(), env, env, preparerOpts...) diff --git a/internal/app/azldev/cmds/component/preparesources.go b/internal/app/azldev/cmds/component/preparesources.go index 6ce88328..92d0f02b 100644 --- a/internal/app/azldev/cmds/component/preparesources.go +++ b/internal/app/azldev/cmds/component/preparesources.go @@ -128,9 +128,7 @@ func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) er var preparerOpts []sources.PreparerOption if options.WithGitRepo { - preparerOpts = append(preparerOpts, sources.WithGitRepo( - env.Config().Project.DefaultAuthorEmail, env, - )) + preparerOpts = append(preparerOpts, sources.WithGitRepo(env)) } if options.AllowNoHashes { diff --git a/internal/app/azldev/cmds/component/render.go b/internal/app/azldev/cmds/component/render.go index 5cec7c66..5b5fb9be 100644 --- a/internal/app/azldev/cmds/component/render.go +++ b/internal/app/azldev/cmds/component/render.go @@ -422,7 +422,7 @@ func prepareComponentSources( // WithSkipLookaside avoids expensive tarball downloads — only spec + // sidecar files are needed for rendering. preparerOpts := []sources.PreparerOption{ - sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail, env), + sources.WithGitRepo(env), sources.WithSkipLookaside(), } diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 481f5f04..29736690 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -63,16 +63,12 @@ type PreparerOption func(*sourcePreparerImpl) // requires the project configuration to reside inside a git repository. // Without this option, no dist-git is created and synthetic history is skipped. // -// The defaultAuthorEmail is used for synthetic changelog entries and commits -// when no author email is available from git history. The cmdFactory is used -// to shell out to git for fingerprint change detection. +// The cmdFactory is used to shell out to git for fingerprint change detection. func WithGitRepo( - defaultAuthorEmail string, cmdFactory opctx.CmdFactory, ) PreparerOption { return func(p *sourcePreparerImpl) { p.withGitRepo = true - p.defaultAuthorEmail = defaultAuthorEmail p.cmdFactory = cmdFactory } } @@ -123,10 +119,6 @@ type sourcePreparerImpl struct { // source preparation. Git-tracked files are still fetched. skipLookaside bool - // defaultAuthorEmail is the email address used for synthetic changelog - // entries and commits when no author email is available from git history. - defaultAuthorEmail string - // cmdFactory is used to shell out to git for fingerprint change detection // in the project repository. Set via [WithGitRepo]. cmdFactory opctx.CmdFactory @@ -375,7 +367,7 @@ func (p *sourcePreparerImpl) trySyntheticHistory( // Build commit metadata from lock file fingerprint changes. changes, importCommit, err := buildSyntheticCommits( - ctx, p.cmdFactory, config, componentName, p.defaultAuthorEmail, + ctx, p.cmdFactory, config, componentName, ) if err != nil { return fmt.Errorf("failed to build synthetic commits:\n%w", err) @@ -394,7 +386,6 @@ func (p *sourcePreparerImpl) trySyntheticHistory( return fmt.Errorf("failed to apply release bump:\n%w", err) } - // Use os.Stat (not p.fs) because go-git always operates on the real filesystem. gitDirPath := filepath.Join(sourcesDirPath, ".git") _, statErr := os.Stat(gitDirPath) diff --git a/internal/app/azldev/core/sources/synthistory.go b/internal/app/azldev/core/sources/synthistory.go index c0ef2195..65b373a5 100644 --- a/internal/app/azldev/core/sources/synthistory.go +++ b/internal/app/azldev/core/sources/synthistory.go @@ -4,11 +4,9 @@ package sources import ( - "bytes" "context" "fmt" "log/slog" - "os/exec" "path/filepath" "slices" "strings" @@ -17,8 +15,10 @@ import ( gogit "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/git" toml "github.com/pelletier/go-toml/v2" ) @@ -77,40 +77,34 @@ func FindFingerprintChanges( projectRepoDir string, lockFileRelPath string, ) ([]FingerprintChange, error) { - // Get the list of commit hashes that touched the lock file. - hashes, err := gitLogFileHashes(ctx, cmdFactory, projectRepoDir, lockFileRelPath) + // Get commit metadata (newest-first) for all commits that touched the lock file. + metas, err := gitLogFileMetadata(ctx, cmdFactory, projectRepoDir, lockFileRelPath) if err != nil { return nil, err } - if len(hashes) == 0 { + if len(metas) == 0 { return nil, nil } - // Build a chronological list of (hash, lockFileFields) for each commit. + // Pair each commit's metadata with its lock file fields. type entry struct { - hash string fields lockFileFields meta CommitMetadata } var entries []entry //nolint:prealloc // size not known ahead of time. - for _, hash := range hashes { - fields, err := gitShowLockFile(ctx, cmdFactory, projectRepoDir, hash, lockFileRelPath) + for _, meta := range metas { + fields, err := gitShowLockFile(ctx, cmdFactory, projectRepoDir, meta.Hash, lockFileRelPath) if err != nil { slog.Warn("Failed to read lock file at commit; skipping", - "commit", hash, "error", err) + "commit", meta.Hash, "error", err) continue } - meta, err := gitCommitMetadata(ctx, cmdFactory, projectRepoDir, hash) - if err != nil { - return nil, fmt.Errorf("failed to get metadata for commit %#q:\n%w", hash, err) - } - - entries = append(entries, entry{hash: hash, fields: fields, meta: meta}) + entries = append(entries, entry{fields: fields, meta: meta}) } if len(entries) == 0 { @@ -160,9 +154,14 @@ func CommitInterleavedHistory( changes []FingerprintChange, importCommit string, ) error { + // The latest fingerprint change's UpstreamCommit is the commit we're + // pinned to — use it as the upper bound for the upstream walk instead + // of HEAD, which may be ahead (e.g., at the branch tip). + upstreamCommit := changes[len(changes)-1].UpstreamCommit + // Collect upstream commits BEFORE staging, so the temporary commit // created by stageAndCaptureOverlayTree is not included. - upstreamCommits, err := collectUpstreamCommits(repo, importCommit) + upstreamCommits, err := collectUpstreamCommits(repo, importCommit, upstreamCommit) if err != nil { return err } @@ -211,8 +210,8 @@ func stageAndCaptureOverlayTree(repo *gogit.Repository) (plumbing.Hash, error) { // buildInterleavedSequence produces the full commit sequence for the rebuilt // history. Upstream commits appear in chronological order; synthetic commits // that reference an older upstream are inserted directly after it. Synthetic -// commits referencing the latest upstream (or orphaned ones whose upstream is -// not found) are appended at the end. +// commits referencing the latest upstream are appended at the end. Orphaned +// commits whose upstream is not found in the dist-git history are dropped. func buildInterleavedSequence( upstreamCommits []*object.Commit, changes []FingerprintChange, @@ -426,7 +425,7 @@ func updateHead(repo *gogit.Repository, commitHash plumbing.Hash) error { // buildSyntheticCommits resolves the project repository from the component's // config file, walks the lock file's git history for fingerprint changes, and // returns the matching [FingerprintChange] entries sorted chronologically. -// If no fingerprint changes are found, a single default commit is returned. +// Returns an error if the lock file exists but has no fingerprint changes. // The second return value is the import-commit hash from the lock file, used // to scope the upstream commit walk in [CommitInterleavedHistory]. func buildSyntheticCommits( @@ -434,7 +433,6 @@ func buildSyntheticCommits( cmdFactory opctx.CmdFactory, config *projectconfig.ComponentConfig, componentName string, - defaultAuthorEmail string, ) (changes []FingerprintChange, importCommit string, err error) { configFilePath, err := resolveConfigFilePath(config, componentName) if err != nil { @@ -457,11 +455,18 @@ func buildSyntheticCommits( return nil, "", fmt.Errorf("failed to get HEAD hash:\n%w", err) } + // Read the current lock file at HEAD — this must exist for synthetic + // dist-git generation. Without a lock file, we cannot determine the + // import-commit or upstream-commit boundaries. headFields, headErr := gitShowLockFile(ctx, cmdFactory, projectRepoDir, headHash, lockRelPath) - if headErr == nil { - importCommit = headFields.ImportCommit + if headErr != nil { + return nil, "", fmt.Errorf("lock file %#q not found at HEAD of project repository; "+ + "cannot generate synthetic dist-git without a lock file:\n%w", + lockRelPath, headErr) } + importCommit = headFields.ImportCommit + fpChanges, err := FindFingerprintChanges(ctx, cmdFactory, projectRepoDir, lockRelPath) if err != nil { return nil, "", fmt.Errorf("failed to find fingerprint changes for component %#q:\n%w", @@ -469,16 +474,10 @@ func buildSyntheticCommits( } if len(fpChanges) == 0 { - slog.Info("No fingerprint changes found in lock file history; "+ - "creating default commit", - "component", componentName) - - commit, commitErr := defaultOverlayCommit(ctx, cmdFactory, projectRepoDir, componentName, defaultAuthorEmail) - if commitErr != nil { - return nil, "", commitErr - } - - return []FingerprintChange{commit}, importCommit, nil + return nil, "", fmt.Errorf( + "lock file %#q exists but has no fingerprint changes; "+ + "this indicates a corrupt or empty lock file history", + lockRelPath) } slog.Info("Found fingerprint changes for component", @@ -488,53 +487,6 @@ func buildSyntheticCommits( return fpChanges, importCommit, nil } -// defaultOverlayCommit returns a single [FingerprintChange] entry that represents -// a generic commit when no fingerprint changes exist in the lock file history. -// The [FingerprintChange.UpstreamCommit] is read from the current lock file HEAD. -func defaultOverlayCommit( - ctx context.Context, - cmdFactory opctx.CmdFactory, - projectRepoDir string, - componentName string, - defaultAuthorEmail string, -) (FingerprintChange, error) { - if defaultAuthorEmail == "" { - slog.Warn("No default author email configured; synthetic commit will have an empty author email", - "hint", "set 'project.default-author-email' in the project config") - } - - hash, err := gitHeadHash(ctx, cmdFactory, projectRepoDir) - if err != nil { - return FingerprintChange{}, fmt.Errorf("failed to get HEAD hash for default overlay commit:\n%w", err) - } - - meta, err := gitCommitMetadata(ctx, cmdFactory, projectRepoDir, hash) - if err != nil { - return FingerprintChange{}, fmt.Errorf("failed to get HEAD metadata for default overlay commit:\n%w", err) - } - - // Try to read the lock file at HEAD to get the upstream-commit. - lockRelPath := LockFilePath(componentName) - - var upstreamCommit string - - fields, lockErr := gitShowLockFile(ctx, cmdFactory, projectRepoDir, hash, lockRelPath) - if lockErr == nil { - upstreamCommit = fields.UpstreamCommit - } - - return FingerprintChange{ - CommitMetadata: CommitMetadata{ - Hash: hash, - Author: "azldev", - AuthorEmail: defaultAuthorEmail, - Timestamp: meta.Timestamp, - Message: "Latest state for " + componentName, - }, - UpstreamCommit: upstreamCommit, - }, nil -} - // resolveConfigFilePath extracts and validates the source config file path from // the component config. func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName string) (string, error) { @@ -556,31 +508,22 @@ func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName func resolveProjectRepoDir( ctx context.Context, cmdFactory opctx.CmdFactory, configFilePath string, ) (string, error) { - var stderr bytes.Buffer - - rawCmd := exec.CommandContext(ctx, "git", "-C", filepath.Dir(configFilePath), - "rev-parse", "--show-toplevel") - rawCmd.Stderr = &stderr - - cmd, err := cmdFactory.Command(rawCmd) + output, err := git.RunInDir(ctx, cmdFactory, filepath.Dir(configFilePath), "rev-parse", "--show-toplevel") if err != nil { - return "", fmt.Errorf("failed to create git command:\n%w", err) + return "", fmt.Errorf("failed to find project repository for config file %#q:\n%w", + configFilePath, err) } - output, err := cmd.RunAndGetOutput(ctx) - if err != nil { - return "", fmt.Errorf("failed to find project repository for config file %#q:\n%v\n%w", - configFilePath, stderr.String(), err) - } - - return strings.TrimSpace(output), nil + return output, nil } // collectUpstreamCommits returns commits in the repository in chronological -// order (oldest first). When importCommit is non-empty, only commits from -// importCommit (inclusive) onward are returned. When empty, the full history -// is returned. -func collectUpstreamCommits(repo *gogit.Repository, importCommit string) ([]*object.Commit, error) { +// order (oldest first), bounded by importCommit (inclusive start) and +// upstreamCommit (inclusive end). The walk stops as soon as the import-commit +// is reached to avoid traversing the entire history. +func collectUpstreamCommits( + repo *gogit.Repository, importCommit, upstreamCommit string, +) ([]*object.Commit, error) { head, err := repo.Head() if err != nil { return nil, fmt.Errorf("failed to get HEAD reference:\n%w", err) @@ -591,10 +534,37 @@ func collectUpstreamCommits(repo *gogit.Repository, importCommit string) ([]*obj return nil, fmt.Errorf("failed to iterate commit log:\n%w", err) } - var commits []*object.Commit + // Walk newest-first. Collect commits until we pass the upstream-commit + // boundary, then keep collecting until we reach the import-commit. + var ( + commits []*object.Commit + foundUpstream bool + foundImport bool + collecting = upstreamCommit == "" // if no upper bound, collect from start. + ) + + err = iter.ForEach(func(commit *object.Commit) error { + hash := commit.Hash.String() + + // Start collecting once we see the upstream-commit (newest boundary). + if !collecting && hash == upstreamCommit { + collecting = true + } - err = iter.ForEach(func(c *object.Commit) error { - commits = append(commits, c) + if collecting { + commits = append(commits, commit) + } + + if hash == upstreamCommit { + foundUpstream = true + } + + // Stop once we reach the import-commit (oldest boundary). + if importCommit != "" && hash == importCommit { + foundImport = true + + return storer.ErrStop + } return nil }) @@ -602,23 +572,21 @@ func collectUpstreamCommits(repo *gogit.Repository, importCommit string) ([]*obj return nil, fmt.Errorf("failed to walk commit log:\n%w", err) } - // git log returns newest-first; reverse to chronological. - slices.Reverse(commits) - - // If an import-commit boundary is set, trim commits before it. - if importCommit != "" { - for idx, commit := range commits { - if commit.Hash.String() == importCommit { - commits = commits[idx:] - - return commits, nil - } - } + if upstreamCommit != "" && !foundUpstream { + return nil, fmt.Errorf( + "upstream-commit %#q not found in dist-git history; "+ + "the lock file may reference a commit from a different branch", + upstreamCommit) + } - slog.Warn("Import-commit not found in upstream history; using full history", + if importCommit != "" && !foundImport { + slog.Warn("Import-commit not found in dist-git history; using all collected commits", "importCommit", importCommit) } + // Walk was newest-first; reverse to chronological. + slices.Reverse(commits) + return commits, nil } @@ -629,34 +597,41 @@ func unixToTime(unix int64) time.Time { // --- git CLI helpers --- -// gitLogFileHashes returns the commit hashes (newest-first) that touched the -// given file path, scoped to the project repository at repoDir. -func gitLogFileHashes( +// gitLogFileMetadata returns commit metadata (newest-first) for all commits +// that touched the given file path in the repository at repoDir. Each commit's +// metadata is separated by a NUL byte in the git log output. +func gitLogFileMetadata( ctx context.Context, cmdFactory opctx.CmdFactory, repoDir, filePath string, -) ([]string, error) { - var stderr bytes.Buffer - - rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, - "log", "--format=%H", "--follow", "--", filePath) - rawCmd.Stderr = &stderr - - cmd, err := cmdFactory.Command(rawCmd) - if err != nil { - return nil, fmt.Errorf("failed to create git log command:\n%w", err) - } - - output, err := cmd.RunAndGetOutput(ctx) +) ([]CommitMetadata, error) { + output, err := git.RunInDir(ctx, cmdFactory, repoDir, + "log", "--format=%H%n%an%n%ae%n%at%n%s%x00", "--follow", "--", filePath) if err != nil { - return nil, fmt.Errorf("failed to list commits for %#q:\n%v\n%w", - filePath, stderr.String(), err) + return nil, fmt.Errorf("failed to list commits for %#q:\n%w", filePath, err) } - output = strings.TrimSpace(output) if output == "" { return nil, nil } - return strings.Split(output, "\n"), nil + blocks := strings.Split(output, "\x00") + + var metas []CommitMetadata //nolint:prealloc // trailing empty block after split. + + for _, block := range blocks { + block = strings.TrimSpace(block) + if block == "" { + continue + } + + meta, err := ParseCommitMetadata(block) + if err != nil { + return nil, fmt.Errorf("failed to parse commit metadata:\n%w", err) + } + + metas = append(metas, meta) + } + + return metas, nil } // gitShowLockFile reads the lock file content at a specific commit and parses @@ -665,22 +640,11 @@ func gitShowLockFile( ctx context.Context, cmdFactory opctx.CmdFactory, repoDir, commitHash, lockFileRelPath string, ) (lockFileFields, error) { - var stderr bytes.Buffer - ref := commitHash + ":" + lockFileRelPath - rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, "show", ref) - rawCmd.Stderr = &stderr - - cmd, err := cmdFactory.Command(rawCmd) - if err != nil { - return lockFileFields{}, fmt.Errorf("failed to create git show command:\n%w", err) - } - - output, err := cmd.RunAndGetOutput(ctx) + output, err := git.RunInDir(ctx, cmdFactory, repoDir, "show", ref) if err != nil { - return lockFileFields{}, fmt.Errorf("failed to read lock file at %#q:\n%v\n%w", - ref, stderr.String(), err) + return lockFileFields{}, fmt.Errorf("failed to read lock file at %#q:\n%w", ref, err) } var fields lockFileFields @@ -691,31 +655,6 @@ func gitShowLockFile( return fields, nil } -// gitCommitMetadata returns the [CommitMetadata] for a single commit hash. -func gitCommitMetadata( - ctx context.Context, cmdFactory opctx.CmdFactory, repoDir, commitHash string, -) (CommitMetadata, error) { - var stderr bytes.Buffer - - // Format: hash, author name, author email, author date (unix), subject. - rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, - "log", "-1", "--format=%H%n%an%n%ae%n%at%n%s", commitHash) - rawCmd.Stderr = &stderr - - cmd, err := cmdFactory.Command(rawCmd) - if err != nil { - return CommitMetadata{}, fmt.Errorf("failed to create git log command:\n%w", err) - } - - output, err := cmd.RunAndGetOutput(ctx) - if err != nil { - return CommitMetadata{}, fmt.Errorf("failed to get commit metadata for %#q:\n%v\n%w", - commitHash, stderr.String(), err) - } - - return ParseCommitMetadata(output) -} - // commitMetadataFieldCount is the number of fields expected in the output of // 'git log -1 --format=%H%n%an%n%ae%n%at%n%s'. const commitMetadataFieldCount = 5 @@ -748,20 +687,5 @@ func ParseCommitMetadata(output string) (CommitMetadata, error) { func gitHeadHash( ctx context.Context, cmdFactory opctx.CmdFactory, repoDir string, ) (string, error) { - var stderr bytes.Buffer - - rawCmd := exec.CommandContext(ctx, "git", "-C", repoDir, "rev-parse", "HEAD") - rawCmd.Stderr = &stderr - - cmd, err := cmdFactory.Command(rawCmd) - if err != nil { - return "", fmt.Errorf("failed to create git rev-parse command:\n%w", err) - } - - output, err := cmd.RunAndGetOutput(ctx) - if err != nil { - return "", fmt.Errorf("failed to get HEAD hash:\n%v\n%w", stderr.String(), err) - } - - return strings.TrimSpace(output), nil + return git.RunInDir(ctx, cmdFactory, repoDir, "rev-parse", "HEAD") } diff --git a/internal/utils/git/git.go b/internal/utils/git/git.go index c8269292..4c859c95 100644 --- a/internal/utils/git/git.go +++ b/internal/utils/git/git.go @@ -6,6 +6,7 @@ package git import ( + "bytes" "context" "errors" "fmt" @@ -222,3 +223,30 @@ func WithMetadataOnly() GitOptions { opts.args = append(opts.args, "--no-checkout") } } + +// RunInDir executes a git command in the given directory and returns its +// trimmed stdout output. The dir argument is passed via 'git -C dir'. +func RunInDir( + ctx context.Context, cmdFactory opctx.CmdFactory, dir string, args ...string, +) (string, error) { + var stderr bytes.Buffer + + fullArgs := make([]string, 0, len(args)+2) + fullArgs = append(fullArgs, "-C", dir) + fullArgs = append(fullArgs, args...) + + rawCmd := exec.CommandContext(ctx, "git", fullArgs...) + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return "", fmt.Errorf("failed to create git command:\n%w", err) + } + + output, err := cmd.RunAndGetOutput(ctx) + if err != nil { + return "", fmt.Errorf("git %s failed:\n%v\n%w", args[0], stderr.String(), err) + } + + return strings.TrimSpace(output), nil +}