diff --git a/internal/app/azldev/cmds/component/build.go b/internal/app/azldev/cmds/component/build.go index bad7ab5b..738a1074 100644 --- a/internal/app/azldev/cmds/component/build.go +++ b/internal/app/azldev/cmds/component/build.go @@ -253,7 +253,7 @@ func BuildComponent( var preparerOpts []sources.PreparerOption if options.WithGitRepo { - preparerOpts = append(preparerOpts, sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail)) + preparerOpts = append(preparerOpts, sources.WithGitRepo(env)) } sourcePreparer, err := sources.NewPreparer(sourceManager, env.FS(), env, env, preparerOpts...) diff --git a/internal/app/azldev/cmds/component/preparesources.go b/internal/app/azldev/cmds/component/preparesources.go index e71b5c0e..92d0f02b 100644 --- a/internal/app/azldev/cmds/component/preparesources.go +++ b/internal/app/azldev/cmds/component/preparesources.go @@ -128,7 +128,7 @@ func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) er var preparerOpts []sources.PreparerOption if options.WithGitRepo { - preparerOpts = append(preparerOpts, sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail)) + preparerOpts = append(preparerOpts, sources.WithGitRepo(env)) } if options.AllowNoHashes { diff --git a/internal/app/azldev/cmds/component/render.go b/internal/app/azldev/cmds/component/render.go index b10b1a3c..5b5fb9be 100644 --- a/internal/app/azldev/cmds/component/render.go +++ b/internal/app/azldev/cmds/component/render.go @@ -422,7 +422,7 @@ func prepareComponentSources( // WithSkipLookaside avoids expensive tarball downloads — only spec + // sidecar files are needed for rendering. preparerOpts := []sources.PreparerOption{ - sources.WithGitRepo(env.Config().Project.DefaultAuthorEmail), + sources.WithGitRepo(env), sources.WithSkipLookaside(), } diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index ab070730..29736690 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -63,12 +63,13 @@ type PreparerOption func(*sourcePreparerImpl) // requires the project configuration to reside inside a git repository. // Without this option, no dist-git is created and synthetic history is skipped. // -// The defaultAuthorEmail is used for synthetic changelog entries and commits -// when no author email is available from git history. -func WithGitRepo(defaultAuthorEmail string) PreparerOption { +// The cmdFactory is used to shell out to git for fingerprint change detection. +func WithGitRepo( + cmdFactory opctx.CmdFactory, +) PreparerOption { return func(p *sourcePreparerImpl) { p.withGitRepo = true - p.defaultAuthorEmail = defaultAuthorEmail + p.cmdFactory = cmdFactory } } @@ -118,9 +119,9 @@ type sourcePreparerImpl struct { // source preparation. Git-tracked files are still fetched. skipLookaside bool - // defaultAuthorEmail is the email address used for synthetic changelog - // entries and commits when no author email is available from git history. - defaultAuthorEmail string + // cmdFactory is used to shell out to git for fingerprint change detection + // in the project repository. Set via [WithGitRepo]. + cmdFactory opctx.CmdFactory // allowNoHashes, when true, allows source file references without hash // values. Missing hashes are computed from the downloaded files. @@ -220,7 +221,7 @@ func (p *sourcePreparerImpl) PrepareSources( // Record the changes as synthetic git history when dist-git creation is enabled. if p.withGitRepo { - if err := p.trySyntheticHistory(component, outputDir); err != nil { + if err := p.trySyntheticHistory(ctx, component, outputDir); err != nil { return fmt.Errorf("failed to generate synthetic history for component %#q:\n%w", component.GetName(), err) } @@ -350,37 +351,41 @@ func initSourcesRepo(sourcesDirPath string) (*gogit.Repository, error) { } // trySyntheticHistory attempts to create synthetic git commits on top of the -// component's sources directory. If no .git directory exists, one is initialized -// with an initial commit so Affects commits can be layered on uniformly for all -// component types. +// component's sources directory. Synthetic commits are derived from lock file +// fingerprint changes in the project repository and interleaved into the +// upstream dist-git history. If no .git directory exists, one is initialized +// with an initial commit so synthetic commits can be layered on uniformly. // // Returns a non-nil error if history generation fails. func (p *sourcePreparerImpl) trySyntheticHistory( + ctx context.Context, component components.Component, sourcesDirPath string, ) error { config := component.GetConfig() + componentName := component.GetName() - // Build commit metadata from Affects commits. - commits, err := buildSyntheticCommits(config, component.GetName(), p.defaultAuthorEmail) + // Build commit metadata from lock file fingerprint changes. + changes, importCommit, err := buildSyntheticCommits( + ctx, p.cmdFactory, config, componentName, + ) if err != nil { return fmt.Errorf("failed to build synthetic commits:\n%w", err) } - if len(commits) == 0 { + if len(changes) == 0 { slog.Debug("No synthetic commits to create; skipping history generation", - "component", component.GetName()) + "component", componentName) return nil } // Adjust the Release tag before staging changes. See [tryBumpStaticRelease] // for the handling of %autorelease, static integers, and non-standard values. - if err := p.tryBumpStaticRelease(component, sourcesDirPath, len(commits)); err != nil { + if err := p.tryBumpStaticRelease(component, sourcesDirPath, len(changes)); err != nil { return fmt.Errorf("failed to apply release bump:\n%w", err) } - // Use os.Stat (not p.fs) because go-git always operates on the real filesystem. gitDirPath := filepath.Join(sourcesDirPath, ".git") _, statErr := os.Stat(gitDirPath) @@ -391,7 +396,7 @@ func (p *sourcePreparerImpl) trySyntheticHistory( if os.IsNotExist(statErr) { slog.Info("No .git directory in sources; initializing repository", - "component", component.GetName()) + "component", componentName) if _, err := initSourcesRepo(sourcesDirPath); err != nil { return fmt.Errorf("failed to initialize sources repository:\n%w", err) @@ -404,7 +409,7 @@ func (p *sourcePreparerImpl) trySyntheticHistory( return fmt.Errorf("failed to open sources repository at %#q:\n%w", sourcesDirPath, err) } - if err := CommitSyntheticHistory(sourcesRepo, commits); err != nil { + if err := CommitInterleavedHistory(sourcesRepo, changes, importCommit); err != nil { return fmt.Errorf("failed to commit synthetic history:\n%w", err) } diff --git a/internal/app/azldev/core/sources/synthistory.go b/internal/app/azldev/core/sources/synthistory.go index 763b9416..65b373a5 100644 --- a/internal/app/azldev/core/sources/synthistory.go +++ b/internal/app/azldev/core/sources/synthistory.go @@ -4,23 +4,24 @@ package sources import ( + "context" "fmt" "log/slog" "path/filepath" - "regexp" "slices" "strings" "time" gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/git" + toml "github.com/pelletier/go-toml/v2" ) -// affectsRegexPattern is the regex pattern prefix used to match an "Affects:" trailer -// line in a commit message. Each line must contain exactly one component name. -const affectsRegexPattern = `(?m)^[ \t]*Affects:[ \t]*` - // CommitMetadata holds full metadata for a commit in the project repository. type CommitMetadata struct { Hash string @@ -30,176 +31,464 @@ type CommitMetadata struct { Message string } -// MessageAffectsComponent reports whether a commit message contains an "Affects:" -// trailer line naming the given component. -func MessageAffectsComponent(message, componentName string) bool { - re := regexp.MustCompile(affectsRegexPattern + regexp.QuoteMeta(componentName) + `[ \t]*$`) +// FingerprintChange records a project commit that changed a component's lock file +// fingerprint. [UpstreamCommit] is the value of the 'upstream-commit' field in the +// lock file at the point of the change. +type FingerprintChange struct { + CommitMetadata - return re.MatchString(message) + // UpstreamCommit is the upstream dist-git commit hash recorded in the lock + // file at the time the fingerprint changed. + UpstreamCommit string } -// FindAffectsCommits walks the git log from HEAD and returns metadata for all commits -// whose message contains an "Affects: " trailer line. Results are sorted -// chronologically (oldest first). -func FindAffectsCommits(repo *gogit.Repository, componentName string) ([]CommitMetadata, error) { - head, err := repo.Head() +// interleavedEntry represents a single commit in the rebuilt dist-git history. +// Exactly one of upstreamCommit or syntheticChange is non-nil. +type interleavedEntry struct { + upstreamCommit *object.Commit + syntheticChange *FingerprintChange +} + +// LockFilePath returns the relative path to a component's lock file within the +// project repository. The path follows the same letter-prefix convention used by +// [components.RenderedSpecDir]: specs///.lock. +func LockFilePath(componentName string) string { + prefix := strings.ToLower(componentName[:1]) + + return filepath.Join("specs", prefix, componentName, componentName+".lock") +} + +// lockFileFields holds the subset of lock file fields needed for fingerprint +// change detection. This avoids importing the full [lockfile.ComponentLock] +// struct and decouples the synthetic history logic from lock file versioning. +type lockFileFields struct { + ImportCommit string `toml:"import-commit"` + UpstreamCommit string `toml:"upstream-commit"` + InputFingerprint string `toml:"input-fingerprint"` +} + +// FindFingerprintChanges walks the git log of the project repository for commits +// that changed the given lock file and returns metadata for each commit where the +// 'input-fingerprint' field changed. Results are sorted chronologically (oldest +// first). +func FindFingerprintChanges( + ctx context.Context, + cmdFactory opctx.CmdFactory, + projectRepoDir string, + lockFileRelPath string, +) ([]FingerprintChange, error) { + // Get commit metadata (newest-first) for all commits that touched the lock file. + metas, err := gitLogFileMetadata(ctx, cmdFactory, projectRepoDir, lockFileRelPath) if err != nil { - return nil, fmt.Errorf("failed to get HEAD reference:\n%w", err) + return nil, err } - commitIter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) - if err != nil { - return nil, fmt.Errorf("failed to iterate commit log:\n%w", err) + if len(metas) == 0 { + return nil, nil } - var matches []CommitMetadata + // Pair each commit's metadata with its lock file fields. + type entry struct { + fields lockFileFields + meta CommitMetadata + } - err = commitIter.ForEach(func(commit *object.Commit) error { - if MessageAffectsComponent(commit.Message, componentName) { - matches = append(matches, CommitMetadata{ - Hash: commit.Hash.String(), - Author: commit.Author.Name, - AuthorEmail: commit.Author.Email, - Timestamp: commit.Author.When.Unix(), - Message: strings.TrimSpace(commit.Message), - }) + var entries []entry //nolint:prealloc // size not known ahead of time. + + for _, meta := range metas { + fields, err := gitShowLockFile(ctx, cmdFactory, projectRepoDir, meta.Hash, lockFileRelPath) + if err != nil { + slog.Warn("Failed to read lock file at commit; skipping", + "commit", meta.Hash, "error", err) + + continue } - return nil - }) - if err != nil { - return nil, fmt.Errorf("failed to walk commit log:\n%w", err) + entries = append(entries, entry{fields: fields, meta: meta}) + } + + if len(entries) == 0 { + return nil, nil } - // Log iteration returns newest-first; reverse to get chronological order. - slices.Reverse(matches) + // Entries are newest-first (from git log order). Reverse to chronological. + slices.Reverse(entries) + + // Walk chronologically and detect fingerprint changes. + var changes []FingerprintChange - return matches, nil + prevFingerprint := "" + + for _, change := range entries { + if change.fields.InputFingerprint != prevFingerprint { + changes = append(changes, FingerprintChange{ + CommitMetadata: change.meta, + UpstreamCommit: change.fields.UpstreamCommit, + }) + } + + prevFingerprint = change.fields.InputFingerprint + } + + return changes, nil } -// CommitSyntheticHistory stages all pending working tree changes and creates synthetic -// commits in the provided git repository. The first commit captures all file changes; -// subsequent commits are created as empty commits to preserve the commit count for -// rpmautospec release numbering. -func CommitSyntheticHistory( +// CommitInterleavedHistory rebuilds the dist-git history by interleaving +// synthetic commits with the existing upstream commits. Synthetic commits +// referencing an older upstream commit are placed directly after that commit; +// those referencing the latest upstream commit are appended on top. The very +// last synthetic commit carries the overlay file changes; all others are empty. +// +// The resulting git history looks like: +// +// U1 → F1 → F2 → U2' → U3' → F3 → F4 +// +// where U1 is the import-commit (kept as-is), F1/F2 are synthetic commits +// interleaved after U1, U2'/U3' are upstream commits replayed with new parents, +// and F3/F4 are synthetic commits on top (F4 carries overlay changes). +// +// When importCommit is non-empty, only upstream commits from importCommit +// onward are considered for interleaving. +func CommitInterleavedHistory( repo *gogit.Repository, - commits []CommitMetadata, + changes []FingerprintChange, + importCommit string, ) error { + // The latest fingerprint change's UpstreamCommit is the commit we're + // pinned to — use it as the upper bound for the upstream walk instead + // of HEAD, which may be ahead (e.g., at the branch tip). + upstreamCommit := changes[len(changes)-1].UpstreamCommit + + // Collect upstream commits BEFORE staging, so the temporary commit + // created by stageAndCaptureOverlayTree is not included. + upstreamCommits, err := collectUpstreamCommits(repo, importCommit, upstreamCommit) + if err != nil { + return err + } + + // Stage overlay changes and capture the resulting tree hash. + overlayTreeHash, err := stageAndCaptureOverlayTree(repo) + if err != nil { + return err + } + + // Build the full interleaved sequence of upstream and synthetic commits. + sequence := buildInterleavedSequence(upstreamCommits, changes) + + return replayInterleavedHistory(repo, sequence, overlayTreeHash) +} + +// stageAndCaptureOverlayTree stages all working tree changes and creates a +// temporary commit to capture the resulting tree hash. The tree hash is used +// later to set the content of the final synthetic commit. +func stageAndCaptureOverlayTree(repo *gogit.Repository) (plumbing.Hash, error) { worktree, err := repo.Worktree() if err != nil { - return fmt.Errorf("failed to get worktree:\n%w", err) + return plumbing.ZeroHash, fmt.Errorf("failed to get worktree:\n%w", err) } - // Stage all working tree changes once — overlays have already been applied. if err := worktree.AddWithOptions(&gogit.AddOptions{All: true}); err != nil { - return fmt.Errorf("failed to stage changes:\n%w", err) + return plumbing.ZeroHash, fmt.Errorf("failed to stage changes:\n%w", err) + } + + tempHash, err := worktree.Commit("temp: capture overlay tree", &gogit.CommitOptions{ + AllowEmptyCommits: true, + Author: &object.Signature{Name: "azldev", When: time.Unix(0, 0).UTC()}, + }) + if err != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to create temporary commit:\n%w", err) + } + + tempCommit, err := repo.CommitObject(tempHash) + if err != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to read temporary commit:\n%w", err) + } + + return tempCommit.TreeHash, nil +} + +// buildInterleavedSequence produces the full commit sequence for the rebuilt +// history. Upstream commits appear in chronological order; synthetic commits +// that reference an older upstream are inserted directly after it. Synthetic +// commits referencing the latest upstream are appended at the end. Orphaned +// commits whose upstream is not found in the dist-git history are dropped. +func buildInterleavedSequence( + upstreamCommits []*object.Commit, + changes []FingerprintChange, +) []interleavedEntry { + latestUpstream := changes[len(changes)-1].UpstreamCommit + + var interleaved, top []FingerprintChange + + for i := range changes { + if changes[i].UpstreamCommit == latestUpstream { + top = append(top, changes[i]) + } else { + interleaved = append(interleaved, changes[i]) + } + } + + // Build a lookup from upstream-commit hash → synthetic commits. + interleavedByUpstream := make(map[string][]FingerprintChange) + + for i := range interleaved { + hash := interleaved[i].UpstreamCommit + interleavedByUpstream[hash] = append(interleavedByUpstream[hash], interleaved[i]) + } + + // Walk upstream commits, inserting synthetics after their referenced commit. + sequence := make([]interleavedEntry, 0, len(upstreamCommits)+len(changes)) + + for i := range upstreamCommits { + sequence = append(sequence, interleavedEntry{upstreamCommit: upstreamCommits[i]}) + + hash := upstreamCommits[i].Hash.String() + if synthetics, ok := interleavedByUpstream[hash]; ok { + for j := range synthetics { + synth := synthetics[j] + sequence = append(sequence, interleavedEntry{syntheticChange: &synth}) + } + + delete(interleavedByUpstream, hash) + } + } + + // Orphaned changes whose upstream-commit wasn't found are dropped — + // they reference an upstream commit outside the known dist-git history. + for hash, orphaned := range interleavedByUpstream { + slog.Warn("Upstream commit referenced by fingerprint change not found in dist-git history; "+ + "dropping", + "upstreamCommit", hash, + "count", len(orphaned)) + } + + // Append "top" synthetic commits at the end. + for i := range top { + topChange := top[i] + sequence = append(sequence, interleavedEntry{syntheticChange: &topChange}) } - for commitIdx, commitMeta := range commits { + return sequence +} + +// replayInterleavedHistory walks the interleaved sequence and creates new +// commit objects with correct tree hashes and parent chains. The first upstream +// commit (import-commit) is kept as-is; subsequent upstream commits are +// recreated with updated parents. Synthetic commits are empty except for the +// very last one, which carries the overlay tree. +func replayInterleavedHistory( + repo *gogit.Repository, + sequence []interleavedEntry, + overlayTreeHash plumbing.Hash, +) error { + syntheticCount := countSyntheticEntries(sequence) + + var ( + lastHash plumbing.Hash + lastTreeHash plumbing.Hash + syntheticIdx int + ) + + for idx, entry := range sequence { + if idx == 0 && entry.upstreamCommit != nil { + lastHash = entry.upstreamCommit.Hash + lastTreeHash = entry.upstreamCommit.TreeHash + + continue + } + + if entry.upstreamCommit != nil { + hash, err := createCommitObject(repo, + entry.upstreamCommit.TreeHash, lastHash, + entry.upstreamCommit.Author, entry.upstreamCommit.Committer, + entry.upstreamCommit.Message) + if err != nil { + return fmt.Errorf("failed to replay upstream commit:\n%w", err) + } + + lastHash = hash + lastTreeHash = entry.upstreamCommit.TreeHash + + continue + } + + syntheticIdx++ + + isLast := syntheticIdx == syntheticCount + + treeHash := lastTreeHash + if isLast { + treeHash = overlayTreeHash + } + + change := entry.syntheticChange + author := object.Signature{ + Name: change.Author, + Email: change.AuthorEmail, + When: unixToTime(change.Timestamp), + } + + message := fmt.Sprintf("%s\n\nProject commit: %s", change.Message, change.Hash) + slog.Info("Creating synthetic commit", - "commit", commitIdx+1, - "total", len(commits), - "projectHash", commitMeta.Hash, + "commit", syntheticIdx, + "total", syntheticCount, + "projectHash", change.Hash, + "upstreamCommit", change.UpstreamCommit, + "isLast", isLast, ) - message := fmt.Sprintf("%s\n\nProject commit: %s", - commitMeta.Message, commitMeta.Hash) - - _, err := worktree.Commit(message, &gogit.CommitOptions{ - AllowEmptyCommits: true, - Author: &object.Signature{ - Name: commitMeta.Author, - Email: commitMeta.AuthorEmail, - When: unixToTime(commitMeta.Timestamp), - }, - }) + hash, err := createCommitObject(repo, treeHash, lastHash, author, author, message) if err != nil { - return fmt.Errorf("failed to create synthetic commit %d:\n%w", commitIdx+1, err) + return fmt.Errorf("failed to create synthetic commit %d:\n%w", syntheticIdx, err) } + + lastHash = hash + lastTreeHash = treeHash } - slog.Info("Synthetic history generation complete", - "commitsCreated", len(commits)) + if err := updateHead(repo, lastHash); err != nil { + return err + } + + slog.Info("Interleaved synthetic history complete", + "syntheticCommits", syntheticCount, + "totalCommits", len(sequence)) return nil } -// buildSyntheticCommits resolves the project repository from the component's config file, -// walks the git log for commits containing "Affects: ", and returns the -// matching commit metadata sorted chronologically. If no Affects commits are found, a -// single default overlay commit is returned instead. -func buildSyntheticCommits( - config *projectconfig.ComponentConfig, componentName, defaultAuthorEmail string, -) ([]CommitMetadata, error) { - configFilePath, err := resolveConfigFilePath(config, componentName) - if err != nil { - // No config file reference means this component can't have Affects commits. - slog.Debug("Cannot resolve config file for synthetic commits; skipping", - "component", componentName, "error", err) +// countSyntheticEntries returns the number of synthetic entries in the sequence. +func countSyntheticEntries(sequence []interleavedEntry) int { + count := 0 - return nil, nil + for _, entry := range sequence { + if entry.syntheticChange != nil { + count++ + } } - projectRepo, err := openProjectRepo(configFilePath) - if err != nil { - return nil, err + return count +} + +// createCommitObject creates a new commit in the repository's object store with +// the given tree, parent, author, committer, and message. +func createCommitObject( + repo *gogit.Repository, + treeHash, parentHash plumbing.Hash, + author, committer object.Signature, + message string, +) (plumbing.Hash, error) { + commit := &object.Commit{ + Author: author, + Committer: committer, + Message: message, + TreeHash: treeHash, + ParentHashes: []plumbing.Hash{parentHash}, + } + + obj := repo.Storer.NewEncodedObject() + if err := commit.Encode(obj); err != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to encode commit:\n%w", err) } - affectsCommits, err := FindAffectsCommits(projectRepo, componentName) + hash, err := repo.Storer.SetEncodedObject(obj) if err != nil { - return nil, fmt.Errorf("failed to find Affects commits for component %#q:\n%w", componentName, err) + return plumbing.ZeroHash, fmt.Errorf("failed to store commit:\n%w", err) } - if len(affectsCommits) == 0 { - slog.Info("No commits with Affects marker found; "+ - "creating default commit", - "component", componentName) + return hash, nil +} - commit := defaultOverlayCommit(projectRepo, componentName, defaultAuthorEmail) +// updateHead updates the HEAD reference (or the branch it points to) to the +// given commit hash. +func updateHead(repo *gogit.Repository, commitHash plumbing.Hash) error { + head, err := repo.Storer.Reference(plumbing.HEAD) + if err != nil { + return fmt.Errorf("failed to read HEAD reference:\n%w", err) + } - return []CommitMetadata{commit}, nil + // Resolve symbolic ref (e.g., HEAD → refs/heads/main). + name := plumbing.HEAD + if head.Type() != plumbing.HashReference { + name = head.Target() } - slog.Info("Found commits affecting component", - "component", componentName, - "commitCount", len(affectsCommits)) + ref := plumbing.NewHashReference(name, commitHash) + if err := repo.Storer.SetReference(ref); err != nil { + return fmt.Errorf("failed to update HEAD to %s:\n%w", commitHash, err) + } - return affectsCommits, nil + return nil } -// defaultOverlayCommit returns a single [CommitMetadata] entry that represents a generic -// commit when no Affects commits exist in the project history. The commit hash is -// set to the current HEAD of the project repository. -func defaultOverlayCommit(repo *gogit.Repository, componentName, - defaultAuthorEmail string, -) CommitMetadata { - if defaultAuthorEmail == "" { - slog.Warn("No default author email configured; synthetic commit will have an empty author email", - "hint", "set project.default-author-email in the project config") +// buildSyntheticCommits resolves the project repository from the component's +// config file, walks the lock file's git history for fingerprint changes, and +// returns the matching [FingerprintChange] entries sorted chronologically. +// Returns an error if the lock file exists but has no fingerprint changes. +// The second return value is the import-commit hash from the lock file, used +// to scope the upstream commit walk in [CommitInterleavedHistory]. +func buildSyntheticCommits( + ctx context.Context, + cmdFactory opctx.CmdFactory, + config *projectconfig.ComponentConfig, + componentName string, +) (changes []FingerprintChange, importCommit string, err error) { + configFilePath, err := resolveConfigFilePath(config, componentName) + if err != nil { + slog.Debug("Cannot resolve config file for synthetic commits; skipping", + "component", componentName, "error", err) + + return nil, "", nil } - var ( - timestamp int64 - hash string - ) + projectRepoDir, err := resolveProjectRepoDir(ctx, cmdFactory, configFilePath) + if err != nil { + return nil, "", err + } - if head, err := repo.Head(); err == nil { - hash = head.Hash().String() - if commit, commitErr := repo.CommitObject(head.Hash()); commitErr == nil { - timestamp = commit.Author.When.Unix() - } + lockRelPath := LockFilePath(componentName) + + // Read the current lock file at HEAD to get the import-commit boundary. + headHash, err := gitHeadHash(ctx, cmdFactory, projectRepoDir) + if err != nil { + return nil, "", fmt.Errorf("failed to get HEAD hash:\n%w", err) } - return CommitMetadata{ - Hash: hash, - Author: "azldev", - AuthorEmail: defaultAuthorEmail, - Timestamp: timestamp, - Message: "Latest state for " + componentName, + // Read the current lock file at HEAD — this must exist for synthetic + // dist-git generation. Without a lock file, we cannot determine the + // import-commit or upstream-commit boundaries. + headFields, headErr := gitShowLockFile(ctx, cmdFactory, projectRepoDir, headHash, lockRelPath) + if headErr != nil { + return nil, "", fmt.Errorf("lock file %#q not found at HEAD of project repository; "+ + "cannot generate synthetic dist-git without a lock file:\n%w", + lockRelPath, headErr) + } + + importCommit = headFields.ImportCommit + + fpChanges, err := FindFingerprintChanges(ctx, cmdFactory, projectRepoDir, lockRelPath) + if err != nil { + return nil, "", fmt.Errorf("failed to find fingerprint changes for component %#q:\n%w", + componentName, err) } + + if len(fpChanges) == 0 { + return nil, "", fmt.Errorf( + "lock file %#q exists but has no fingerprint changes; "+ + "this indicates a corrupt or empty lock file history", + lockRelPath) + } + + slog.Info("Found fingerprint changes for component", + "component", componentName, + "changeCount", len(fpChanges)) + + return fpChanges, importCommit, nil } -// resolveConfigFilePath extracts and validates the source config file path from the component config. +// resolveConfigFilePath extracts and validates the source config file path from +// the component config. func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName string) (string, error) { configFile := config.SourceConfigFile if configFile == nil { @@ -214,21 +503,189 @@ func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName return configFilePath, nil } -// openProjectRepo finds and opens the git repository containing configFilePath by -// walking up the directory tree. -func openProjectRepo(configFilePath string) (*gogit.Repository, error) { - repo, err := gogit.PlainOpenWithOptions(filepath.Dir(configFilePath), &gogit.PlainOpenOptions{ - DetectDotGit: true, - }) +// resolveProjectRepoDir returns the root directory of the git repository +// containing configFilePath by running 'git rev-parse --show-toplevel'. +func resolveProjectRepoDir( + ctx context.Context, cmdFactory opctx.CmdFactory, configFilePath string, +) (string, error) { + output, err := git.RunInDir(ctx, cmdFactory, filepath.Dir(configFilePath), "rev-parse", "--show-toplevel") if err != nil { - return nil, fmt.Errorf("failed to find project repository for config file %#q:\n%w", + return "", fmt.Errorf("failed to find project repository for config file %#q:\n%w", configFilePath, err) } - return repo, nil + return output, nil +} + +// collectUpstreamCommits returns commits in the repository in chronological +// order (oldest first), bounded by importCommit (inclusive start) and +// upstreamCommit (inclusive end). The walk stops as soon as the import-commit +// is reached to avoid traversing the entire history. +func collectUpstreamCommits( + repo *gogit.Repository, importCommit, upstreamCommit string, +) ([]*object.Commit, error) { + head, err := repo.Head() + if err != nil { + return nil, fmt.Errorf("failed to get HEAD reference:\n%w", err) + } + + iter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) + if err != nil { + return nil, fmt.Errorf("failed to iterate commit log:\n%w", err) + } + + // Walk newest-first. Collect commits until we pass the upstream-commit + // boundary, then keep collecting until we reach the import-commit. + var ( + commits []*object.Commit + foundUpstream bool + foundImport bool + collecting = upstreamCommit == "" // if no upper bound, collect from start. + ) + + err = iter.ForEach(func(commit *object.Commit) error { + hash := commit.Hash.String() + + // Start collecting once we see the upstream-commit (newest boundary). + if !collecting && hash == upstreamCommit { + collecting = true + } + + if collecting { + commits = append(commits, commit) + } + + if hash == upstreamCommit { + foundUpstream = true + } + + // Stop once we reach the import-commit (oldest boundary). + if importCommit != "" && hash == importCommit { + foundImport = true + + return storer.ErrStop + } + + return nil + }) + if err != nil { + return nil, fmt.Errorf("failed to walk commit log:\n%w", err) + } + + if upstreamCommit != "" && !foundUpstream { + return nil, fmt.Errorf( + "upstream-commit %#q not found in dist-git history; "+ + "the lock file may reference a commit from a different branch", + upstreamCommit) + } + + if importCommit != "" && !foundImport { + slog.Warn("Import-commit not found in dist-git history; using all collected commits", + "importCommit", importCommit) + } + + // Walk was newest-first; reverse to chronological. + slices.Reverse(commits) + + return commits, nil } // unixToTime converts a Unix timestamp to a [time.Time] in UTC. func unixToTime(unix int64) time.Time { return time.Unix(unix, 0).UTC() } + +// --- git CLI helpers --- + +// gitLogFileMetadata returns commit metadata (newest-first) for all commits +// that touched the given file path in the repository at repoDir. Each commit's +// metadata is separated by a NUL byte in the git log output. +func gitLogFileMetadata( + ctx context.Context, cmdFactory opctx.CmdFactory, repoDir, filePath string, +) ([]CommitMetadata, error) { + output, err := git.RunInDir(ctx, cmdFactory, repoDir, + "log", "--format=%H%n%an%n%ae%n%at%n%s%x00", "--follow", "--", filePath) + if err != nil { + return nil, fmt.Errorf("failed to list commits for %#q:\n%w", filePath, err) + } + + if output == "" { + return nil, nil + } + + blocks := strings.Split(output, "\x00") + + var metas []CommitMetadata //nolint:prealloc // trailing empty block after split. + + for _, block := range blocks { + block = strings.TrimSpace(block) + if block == "" { + continue + } + + meta, err := ParseCommitMetadata(block) + if err != nil { + return nil, fmt.Errorf("failed to parse commit metadata:\n%w", err) + } + + metas = append(metas, meta) + } + + return metas, nil +} + +// gitShowLockFile reads the lock file content at a specific commit and parses +// the 'upstream-commit' and 'input-fingerprint' TOML fields. +func gitShowLockFile( + ctx context.Context, cmdFactory opctx.CmdFactory, + repoDir, commitHash, lockFileRelPath string, +) (lockFileFields, error) { + ref := commitHash + ":" + lockFileRelPath + + output, err := git.RunInDir(ctx, cmdFactory, repoDir, "show", ref) + if err != nil { + return lockFileFields{}, fmt.Errorf("failed to read lock file at %#q:\n%w", ref, err) + } + + var fields lockFileFields + if err := toml.Unmarshal([]byte(output), &fields); err != nil { + return lockFileFields{}, fmt.Errorf("failed to parse lock file at %#q:\n%w", ref, err) + } + + return fields, nil +} + +// commitMetadataFieldCount is the number of fields expected in the output of +// 'git log -1 --format=%H%n%an%n%ae%n%at%n%s'. +const commitMetadataFieldCount = 5 + +// ParseCommitMetadata parses the output of 'git log -1 --format=%H%n%an%n%ae%n%at%n%s'. +func ParseCommitMetadata(output string) (CommitMetadata, error) { + lines := strings.SplitN(strings.TrimSpace(output), "\n", commitMetadataFieldCount) + + if len(lines) < commitMetadataFieldCount { + return CommitMetadata{}, fmt.Errorf( + "unexpected git log output (expected %d lines, got %d):\n%v", + commitMetadataFieldCount, len(lines), output) + } + + var timestamp int64 + if _, err := fmt.Sscanf(lines[3], "%d", ×tamp); err != nil { + return CommitMetadata{}, fmt.Errorf("failed to parse timestamp %#q:\n%w", lines[3], err) + } + + return CommitMetadata{ + Hash: lines[0], + Author: lines[1], + AuthorEmail: lines[2], + Timestamp: timestamp, + Message: lines[4], + }, nil +} + +// gitHeadHash returns the HEAD commit hash of the repository at repoDir. +func gitHeadHash( + ctx context.Context, cmdFactory opctx.CmdFactory, repoDir string, +) (string, error) { + return git.RunInDir(ctx, cmdFactory, repoDir, "rev-parse", "HEAD") +} diff --git a/internal/app/azldev/core/sources/synthistory_test.go b/internal/app/azldev/core/sources/synthistory_test.go index 24ef8209..d44cdc0b 100644 --- a/internal/app/azldev/core/sources/synthistory_test.go +++ b/internal/app/azldev/core/sources/synthistory_test.go @@ -4,7 +4,6 @@ package sources_test import ( - "fmt" "testing" "time" @@ -17,258 +16,170 @@ import ( "github.com/stretchr/testify/require" ) -// createInMemoryRepo creates an empty in-memory git repository. -func createInMemoryRepo(t *testing.T) *gogit.Repository { - t.Helper() - - repo, err := gogit.Init(memory.NewStorage(), memfs.New()) - require.NoError(t, err) +func TestLockFilePath(t *testing.T) { + tests := []struct { + name string + componentName string + want string + }{ + {"simple name", "curl", "specs/c/curl/curl.lock"}, + {"hyphenated name", "curl-minimal", "specs/c/curl-minimal/curl-minimal.lock"}, + {"uppercase first letter", "Kernel", "specs/k/Kernel/Kernel.lock"}, + {"single char name", "a", "specs/a/a/a.lock"}, + {"long name", "golang-github-example", "specs/g/golang-github-example/golang-github-example.lock"}, + } - return repo + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sources.LockFilePath(tt.componentName) + assert.Equal(t, tt.want, got) + }) + } } -// addCommit creates a commit in the in-memory repository with the given message, author name, -// email, and timestamp. A dummy file change is added to ensure the commit is non-empty. -func addCommit( - t *testing.T, repo *gogit.Repository, message, authorName, authorEmail string, when time.Time, -) { - t.Helper() +func TestCommitInterleavedHistory_AllOnTop(t *testing.T) { + // When all fingerprint changes reference the latest upstream commit, + // all synthetic commits should be appended on top. + memFS := memfs.New() + storer := memory.NewStorage() - worktree, err := repo.Worktree() + repo, err := gogit.Init(storer, memFS) require.NoError(t, err) - fs := worktree.Filesystem - - // Write a unique file per commit to guarantee a non-empty diff. - fileName := fmt.Sprintf("file-%d.txt", when.UnixNano()) + worktree, err := repo.Worktree() + require.NoError(t, err) - f, err := fs.Create(fileName) + // Create an upstream commit. + file, err := memFS.Create("package.spec") require.NoError(t, err) - _, err = f.Write([]byte(message)) + _, err = file.Write([]byte("Name: package\nVersion: 1.0\n")) require.NoError(t, err) - require.NoError(t, f.Close()) + require.NoError(t, file.Close()) - _, err = worktree.Add(fileName) + _, err = worktree.Add("package.spec") require.NoError(t, err) - _, err = worktree.Commit(message, &gogit.CommitOptions{ + upstreamCommit, err := worktree.Commit("upstream: initial", &gogit.CommitOptions{ Author: &object.Signature{ - Name: authorName, - Email: authorEmail, - When: when, + Name: "Upstream", + Email: "upstream@fedora.org", + When: time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC), }, }) require.NoError(t, err) -} - -func TestFindAffectsCommits(t *testing.T) { - repo := createInMemoryRepo(t) - - // Three commits: two mention curl, one does not. - addCommit(t, repo, - "Initial setup", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Fix CVE-2025-1234\n\nAffects: curl", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Bump release\n\nAffects: curl", - "Charlie", "charlie@example.com", - time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC)) - results, err := sources.FindAffectsCommits(repo, "curl") + // Simulate overlay modification. + specFile, err := memFS.Create("package.spec") require.NoError(t, err) - // Expect 2 matching commits, oldest first. - require.Len(t, results, 2) - - assert.Equal(t, "Bob", results[0].Author) - assert.Equal(t, "bob@example.com", results[0].AuthorEmail) - assert.Contains(t, results[0].Message, "Fix CVE-2025-1234") - - assert.Equal(t, "Charlie", results[1].Author) - assert.Equal(t, "charlie@example.com", results[1].AuthorEmail) - assert.Contains(t, results[1].Message, "Bump release") - - // Chronological order: Bob's timestamp < Charlie's timestamp. - assert.Less(t, results[0].Timestamp, results[1].Timestamp) -} - -func TestFindAffectsCommits_NoMatches(t *testing.T) { - repo := createInMemoryRepo(t) - - addCommit(t, repo, - "Unrelated change", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) - - results, err := sources.FindAffectsCommits(repo, "curl") + _, err = specFile.Write([]byte("Name: package\nVersion: 1.0\n# overlays applied\n")) require.NoError(t, err) - assert.Empty(t, results) -} - -func TestFindAffectsCommits_MultipleComponents(t *testing.T) { - repo := createInMemoryRepo(t) - - addCommit(t, repo, - "Fix curl issue\n\nAffects: curl", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) + require.NoError(t, specFile.Close()) - addCommit(t, repo, - "Fix wget issue\n\nAffects: wget", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) + upstreamHash := upstreamCommit.String() - addCommit(t, repo, - "Fix both\n\nAffects: curl\nAffects: wget", - "Charlie", "charlie@example.com", - time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC)) + changes := []sources.FingerprintChange{ + { + CommitMetadata: sources.CommitMetadata{ + Hash: "abc123", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), + Message: "Apply patch fix", + }, + UpstreamCommit: upstreamHash, + }, + { + CommitMetadata: sources.CommitMetadata{ + Hash: "def456", + Author: "Bob", + AuthorEmail: "bob@example.com", + Timestamp: time.Date(2025, 2, 20, 14, 0, 0, 0, time.UTC).Unix(), + Message: "Bump release", + }, + UpstreamCommit: upstreamHash, + }, + } - curlResults, err := sources.FindAffectsCommits(repo, "curl") + err = sources.CommitInterleavedHistory(repo, changes, "") require.NoError(t, err) - require.Len(t, curlResults, 2, "curl should match 2 commits") - assert.Equal(t, "Alice", curlResults[0].Author) - assert.Equal(t, "Charlie", curlResults[1].Author) - wgetResults, err := sources.FindAffectsCommits(repo, "wget") + // Verify the commit log: upstream + 2 synthetic = 3 commits. + head, err := repo.Head() require.NoError(t, err) - require.Len(t, wgetResults, 2, "wget should match 2 commits") - assert.Equal(t, "Bob", wgetResults[0].Author) - assert.Equal(t, "Charlie", wgetResults[1].Author) -} -func TestFindAffectsCommits_NoSubstringMatch(t *testing.T) { - repo := createInMemoryRepo(t) + commitIter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) + require.NoError(t, err) - // "Affects: curl-minimal" should NOT match when searching for "curl". - addCommit(t, repo, - "Update curl-minimal\n\nAffects: curl-minimal", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) + var logCommits []*object.Commit - addCommit(t, repo, - "Update curl itself\n\nAffects: curl", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) + err = commitIter.ForEach(func(c *object.Commit) error { + logCommits = append(logCommits, c) - // Searching for "curl" matches only Bob's commit (exact component name). - curlResults, err := sources.FindAffectsCommits(repo, "curl") + return nil + }) require.NoError(t, err) - require.Len(t, curlResults, 1, "exact match should not include curl-minimal commit") - assert.Equal(t, "Bob", curlResults[0].Author) - // Searching for "curl-minimal" matches only Alice's commit. - minimalResults, err := sources.FindAffectsCommits(repo, "curl-minimal") - require.NoError(t, err) - require.Len(t, minimalResults, 1) - assert.Equal(t, "Alice", minimalResults[0].Author) -} + require.Len(t, logCommits, 3, "should have upstream + 2 synthetic commits") -func TestFindAffectsCommits_AffectsInSubject(t *testing.T) { - repo := createInMemoryRepo(t) + // Most recent commit (Bob's) — this is the last synthetic commit. + assert.Contains(t, logCommits[0].Message, "Bump release") + assert.Equal(t, "Bob", logCommits[0].Author.Name) - // Affects marker in the subject line (not just the body). - addCommit(t, repo, - "Affects: curl", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) + // Second commit (Alice's). + assert.Contains(t, logCommits[1].Message, "Apply patch fix") + assert.Equal(t, "Alice", logCommits[1].Author.Name) - results, err := sources.FindAffectsCommits(repo, "curl") - require.NoError(t, err) - require.Len(t, results, 1) - assert.Equal(t, "Alice", results[0].Author) + // Original upstream commit. + assert.Equal(t, "upstream: initial", logCommits[2].Message) } -func TestFindAffectsCommits_CaseSensitive(t *testing.T) { - repo := createInMemoryRepo(t) - - addCommit(t, repo, - "Bump release\n\nAffects: Kernel", - "Alice", "alice@example.com", - time.Date(2025, 1, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Fix CVE\n\nAFFECTS: KERNEL", - "Bob", "bob@example.com", - time.Date(2025, 2, 1, 10, 0, 0, 0, time.UTC)) - - addCommit(t, repo, - "Upstream fix\n\nAffects: kernel", - "Charlie", "charlie@example.com", - time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC)) +func TestCommitInterleavedHistory_Interleaved(t *testing.T) { + // Two upstream commits, one synthetic change for the first (older) upstream + // commit and one for the second (latest). The interleaved commit should + // appear between the two upstream commits. + memFS := memfs.New() + storer := memory.NewStorage() - // Matching is case-sensitive: searching for "kernel" only matches the exact-case commit. - results, err := sources.FindAffectsCommits(repo, "kernel") + repo, err := gogit.Init(storer, memFS) require.NoError(t, err) - require.Len(t, results, 1) - assert.Equal(t, "Charlie", results[0].Author) - // Searching for "Kernel" matches only Alice's commit (exact case on component name). - results, err = sources.FindAffectsCommits(repo, "Kernel") + worktree, err := repo.Worktree() require.NoError(t, err) - require.Len(t, results, 1) - assert.Equal(t, "Alice", results[0].Author) -} - -func TestMessageAffectsComponent(t *testing.T) { - tests := []struct { - name string - message string - component string - want bool - }{ - // Positive matches. - {"exact match in body", "Fix bug\n\nAffects: curl", "curl", true}, - {"trailing whitespace", "Fix bug\n\nAffects: curl ", "curl", true}, - {"leading whitespace on line", "Fix bug\n\n Affects: curl", "curl", true}, - {"subject line only", "Affects: curl", "curl", true}, - - // Negative matches. - {"different component", "Fix bug\n\nAffects: wget", "curl", false}, - {"no substring match", "Fix bug\n\nAffects: curl-minimal", "curl", false}, - {"comma separated", "Fix bug\n\nAffects: curl, wget", "curl", false}, - {"extra text after name", "Affects: curl - fix build failure", "curl", false}, - {"case sensitive", "Fix bug\n\nAffects: Curl", "curl", false}, - {"no match across newlines", "Fix bug\n\nAffects:\ncurl", "curl", false}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := sources.MessageAffectsComponent(tt.message, tt.component) - assert.Equal(t, tt.want, got) - }) - } -} + // Upstream commit 1. + file1, err := memFS.Create("package.spec") + require.NoError(t, err) -func TestCommitSyntheticHistory(t *testing.T) { - // Create an in-memory repo with an initial commit (simulating upstream). - memFS := memfs.New() - storer := memory.NewStorage() + _, err = file1.Write([]byte("Name: package\nVersion: 1.0\n")) + require.NoError(t, err) + require.NoError(t, file1.Close()) - repo, err := gogit.Init(storer, memFS) + _, err = worktree.Add("package.spec") require.NoError(t, err) - worktree, err := repo.Worktree() + upstream1, err := worktree.Commit("upstream: v1.0", &gogit.CommitOptions{ + Author: &object.Signature{ + Name: "Upstream", + Email: "upstream@fedora.org", + When: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }) require.NoError(t, err) - // Create an initial file (upstream). - file, err := memFS.Create("package.spec") + // Upstream commit 2. + file2, err := memFS.Create("package.spec") require.NoError(t, err) - _, err = file.Write([]byte("Name: package\nVersion: 1.0\n")) + _, err = file2.Write([]byte("Name: package\nVersion: 2.0\n")) require.NoError(t, err) - require.NoError(t, file.Close()) + require.NoError(t, file2.Close()) _, err = worktree.Add("package.spec") require.NoError(t, err) - _, err = worktree.Commit("upstream: initial", &gogit.CommitOptions{ + upstream2, err := worktree.Commit("upstream: v2.0", &gogit.CommitOptions{ Author: &object.Signature{ Name: "Upstream", Email: "upstream@fedora.org", @@ -277,36 +188,45 @@ func TestCommitSyntheticHistory(t *testing.T) { }) require.NoError(t, err) - // Simulate overlay application by modifying the working tree before committing. + // Simulate overlay modification in working tree. specFile, err := memFS.Create("package.spec") require.NoError(t, err) - _, err = specFile.Write([]byte("Name: package\nVersion: 1.0\n# overlays applied\n")) + _, err = specFile.Write([]byte("Name: package\nVersion: 2.0\n# overlays\n")) require.NoError(t, err) require.NoError(t, specFile.Close()) - // Define synthetic commits. - commits := []sources.CommitMetadata{ + changes := []sources.FingerprintChange{ { - Hash: "abc123def456", - Author: "Alice", - AuthorEmail: "alice@example.com", - Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), - Message: "Apply patch fix", + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-aaa", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2024, 3, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Fix for v1.0", + }, + UpstreamCommit: upstream1.String(), // references older upstream. }, { - Hash: "789abc012def", - Author: "Bob", - AuthorEmail: "bob@example.com", - Timestamp: time.Date(2025, 2, 20, 14, 0, 0, 0, time.UTC).Unix(), - Message: "Bump release", + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-bbb", + Author: "Bob", + AuthorEmail: "bob@example.com", + Timestamp: time.Date(2024, 7, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Fix for v2.0", + }, + UpstreamCommit: upstream2.String(), // references latest upstream. }, } - err = sources.CommitSyntheticHistory(repo, commits) + err = sources.CommitInterleavedHistory(repo, changes, upstream1.String()) require.NoError(t, err) - // Verify the commit log has 3 commits: upstream + 2 synthetic. + // Expected order (newest first): + // 1. "Fix for v2.0" (synthetic, on top — latest upstream, with overlay) + // 2. "upstream: v2.0" (replayed with new parent) + // 3. "Fix for v1.0" (synthetic, interleaved after upstream v1.0) + // 4. "upstream: v1.0" (import-commit, kept as-is) head, err := repo.Head() require.NoError(t, err) @@ -322,22 +242,15 @@ func TestCommitSyntheticHistory(t *testing.T) { }) require.NoError(t, err) - require.Len(t, logCommits, 3, "should have upstream + 2 synthetic commits") - - // Most recent commit (Bob's) — empty commit. - assert.Contains(t, logCommits[0].Message, "Bump release") - assert.Equal(t, "Bob", logCommits[0].Author.Name) - assert.Equal(t, "bob@example.com", logCommits[0].Author.Email) + require.Len(t, logCommits, 4, "should have 2 upstream + 2 synthetic commits") - // Second commit (Alice's) — has the actual file changes. - assert.Contains(t, logCommits[1].Message, "Apply patch fix") - assert.Equal(t, "Alice", logCommits[1].Author.Name) - - // Original upstream commit. - assert.Equal(t, "upstream: initial", logCommits[2].Message) + assert.Contains(t, logCommits[0].Message, "Fix for v2.0") // top synthetic (latest) + assert.Contains(t, logCommits[1].Message, "upstream: v2.0") // replayed upstream 2 + assert.Contains(t, logCommits[2].Message, "Fix for v1.0") // interleaved synthetic + assert.Contains(t, logCommits[3].Message, "upstream: v1.0") // import-commit (kept) } -func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { +func TestCommitInterleavedHistory_SingleCommit(t *testing.T) { memFS := memfs.New() storer := memory.NewStorage() @@ -357,7 +270,7 @@ func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { _, err = worktree.Add("package.spec") require.NoError(t, err) - _, err = worktree.Commit("upstream: initial", &gogit.CommitOptions{ + upstream, err := worktree.Commit("upstream: initial", &gogit.CommitOptions{ Author: &object.Signature{ Name: "Upstream", Email: "upstream@fedora.org", @@ -374,17 +287,20 @@ func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { require.NoError(t, err) require.NoError(t, specFile.Close()) - commits := []sources.CommitMetadata{ + changes := []sources.FingerprintChange{ { - Hash: "abc123", - Author: "Alice", - AuthorEmail: "alice@example.com", - Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), - Message: "Fix build", + CommitMetadata: sources.CommitMetadata{ + Hash: "abc123", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2025, 1, 10, 10, 0, 0, 0, time.UTC).Unix(), + Message: "Fix build", + }, + UpstreamCommit: upstream.String(), }, } - err = sources.CommitSyntheticHistory(repo, commits) + err = sources.CommitInterleavedHistory(repo, changes, "") require.NoError(t, err) // Verify working tree changes are in the single synthetic commit. @@ -408,3 +324,126 @@ func TestCommitSyntheticHistory_SingleCommit(t *testing.T) { require.NoError(t, err) assert.Contains(t, content, "# modified") } + +func TestCommitInterleavedHistory_OrphanUpstreamCommit(t *testing.T) { + // When a fingerprint change references an upstream commit that doesn't + // exist in the dist-git history, it should be dropped (not appended). + memFS := memfs.New() + storer := memory.NewStorage() + + repo, err := gogit.Init(storer, memFS) + require.NoError(t, err) + + worktree, err := repo.Worktree() + require.NoError(t, err) + + file, err := memFS.Create("package.spec") + require.NoError(t, err) + + _, err = file.Write([]byte("Name: package\n")) + require.NoError(t, err) + require.NoError(t, file.Close()) + + _, err = worktree.Add("package.spec") + require.NoError(t, err) + + upstream, err := worktree.Commit("upstream: initial", &gogit.CommitOptions{ + Author: &object.Signature{ + Name: "Upstream", + Email: "upstream@fedora.org", + When: time.Date(2024, 6, 1, 0, 0, 0, 0, time.UTC), + }, + }) + require.NoError(t, err) + + changes := []sources.FingerprintChange{ + { + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-orphan", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: time.Date(2024, 3, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Fix for unknown upstream", + }, + UpstreamCommit: "deadbeefdeadbeef", // not in dist-git history. + }, + { + CommitMetadata: sources.CommitMetadata{ + Hash: "proj-latest", + Author: "Bob", + AuthorEmail: "bob@example.com", + Timestamp: time.Date(2024, 7, 1, 0, 0, 0, 0, time.UTC).Unix(), + Message: "Latest fix", + }, + UpstreamCommit: upstream.String(), // latest. + }, + } + + err = sources.CommitInterleavedHistory(repo, changes, "") + require.NoError(t, err) + + head, err := repo.Head() + require.NoError(t, err) + + commitIter, err := repo.Log(&gogit.LogOptions{From: head.Hash()}) + require.NoError(t, err) + + var logCommits []*object.Commit + + err = commitIter.ForEach(func(c *object.Commit) error { + logCommits = append(logCommits, c) + + return nil + }) + require.NoError(t, err) + + // Only the latest-upstream synthetic commit is included; orphan is dropped. + require.Len(t, logCommits, 2) + assert.Contains(t, logCommits[0].Message, "Latest fix") + assert.Equal(t, "upstream: initial", logCommits[1].Message) +} + +func TestParseCommitMetadata(t *testing.T) { + tests := []struct { + name string + input string + want sources.CommitMetadata + wantErr bool + }{ + { + name: "valid output", + input: "abc123def456\nAlice\nalice@example.com\n1706100000\nFix CVE-2025-1234", + want: sources.CommitMetadata{ + Hash: "abc123def456", + Author: "Alice", + AuthorEmail: "alice@example.com", + Timestamp: 1706100000, + Message: "Fix CVE-2025-1234", + }, + }, + { + name: "too few lines", + input: "abc123\nAlice\nalice@example.com", + wantErr: true, + }, + { + name: "invalid timestamp", + input: "abc123\nAlice\nalice@example.com\nnot-a-number\nFix bug", + wantErr: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + got, err := sources.ParseCommitMetadata(test.input) + if test.wantErr { + assert.Error(t, err) + + return + } + + require.NoError(t, err) + assert.Equal(t, test.want, got) + }) + } +} diff --git a/internal/utils/git/git.go b/internal/utils/git/git.go index c8269292..4c859c95 100644 --- a/internal/utils/git/git.go +++ b/internal/utils/git/git.go @@ -6,6 +6,7 @@ package git import ( + "bytes" "context" "errors" "fmt" @@ -222,3 +223,30 @@ func WithMetadataOnly() GitOptions { opts.args = append(opts.args, "--no-checkout") } } + +// RunInDir executes a git command in the given directory and returns its +// trimmed stdout output. The dir argument is passed via 'git -C dir'. +func RunInDir( + ctx context.Context, cmdFactory opctx.CmdFactory, dir string, args ...string, +) (string, error) { + var stderr bytes.Buffer + + fullArgs := make([]string, 0, len(args)+2) + fullArgs = append(fullArgs, "-C", dir) + fullArgs = append(fullArgs, args...) + + rawCmd := exec.CommandContext(ctx, "git", fullArgs...) + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return "", fmt.Errorf("failed to create git command:\n%w", err) + } + + output, err := cmd.RunAndGetOutput(ctx) + if err != nil { + return "", fmt.Errorf("git %s failed:\n%v\n%w", args[0], stderr.String(), err) + } + + return strings.TrimSpace(output), nil +}