From 26a46d319502d3e1c28180768650684c01f9fbd9 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 15:55:21 -0700 Subject: [PATCH 1/2] feat(config): Add fingerprint ignore tags to some config fields In preparation for the upcoming component change detection feature, this commit selects which config fields should be ignored when computing the identity of a component using a fingerprinting mechanism. --- .github/instructions/go.instructions.md | 12 +++ internal/projectconfig/build.go | 6 +- internal/projectconfig/component.go | 8 +- internal/projectconfig/distro.go | 2 +- internal/projectconfig/fingerprint_test.go | 115 +++++++++++++++++++++ internal/projectconfig/overlay.go | 2 +- internal/projectconfig/package.go | 2 +- 7 files changed, 137 insertions(+), 10 deletions(-) create mode 100644 internal/projectconfig/fingerprint_test.go diff --git a/.github/instructions/go.instructions.md b/.github/instructions/go.instructions.md index 1753ba9e..07b50af5 100644 --- a/.github/instructions/go.instructions.md +++ b/.github/instructions/go.instructions.md @@ -96,6 +96,18 @@ description: "Instructions for working on the azldev Go codebase. IMPORTANT: Alw } ``` +## Component Fingerprinting — `fingerprint:"-"` Tags + +Structs in `internal/projectconfig/` are hashed by `hashstructure.Hash()` to detect component changes. Fields **included by default** (safe: false positive > false negative). + +When adding a new field to a fingerprinted struct, ask: **"Does changing this field change the build output?"** +- **Yes** (build flags, spec source, defines, etc.) → do nothing, included automatically. +- **No** (human docs, scheduling hints, CI policy, metadata, back-references) → add `fingerprint:"-"` to the struct tag and register the exclusion in `expectedExclusions` in `internal/projectconfig/fingerprint_test.go`. + +If a parent struct field is already excluded (e.g. `Failure ComponentBuildFailureConfig ... fingerprint:"-"`), do **not** also tag the inner struct's fields — `hashstructure` skips the entire subtree. + +Run `mage unit` to verify — the guard test will catch unregistered exclusions or missing tags. + ### Cmdline Returns CLI commands should return meaningful structured results. azldev has output formatting helpers to facilitate this (for example, `RunFunc*` wrappers handle formatting, so callers typically should not call `reflectable.FormatValue` directly). diff --git a/internal/projectconfig/build.go b/internal/projectconfig/build.go index 50ee2c79..ac144ab8 100644 --- a/internal/projectconfig/build.go +++ b/internal/projectconfig/build.go @@ -13,7 +13,7 @@ type CheckConfig struct { // Skip indicates whether the %check section should be disabled for this component. Skip bool `toml:"skip,omitempty" json:"skip,omitempty" jsonschema:"title=Skip check,description=Disables the %check section by prepending 'exit 0' when set to true"` // SkipReason provides a required justification when Skip is true. - SkipReason string `toml:"skip_reason,omitempty" json:"skipReason,omitempty" jsonschema:"title=Skip reason,description=Required justification for skipping the %check section"` + SkipReason string `toml:"skip_reason,omitempty" json:"skipReason,omitempty" jsonschema:"title=Skip reason,description=Required justification for skipping the %check section" fingerprint:"-"` } // Validate checks that required fields are set when Skip is true. @@ -43,9 +43,9 @@ type ComponentBuildConfig struct { // Check section configuration. Check CheckConfig `toml:"check,omitempty" json:"check,omitempty" jsonschema:"title=Check configuration,description=Configuration for the %check section"` // Failure configuration and policy for this component's build. - Failure ComponentBuildFailureConfig `toml:"failure,omitempty" json:"failure,omitempty" jsonschema:"title=Build failure configuration,description=Configuration and policy regarding build failures for this component."` + Failure ComponentBuildFailureConfig `toml:"failure,omitempty" json:"failure,omitempty" jsonschema:"title=Build failure configuration,description=Configuration and policy regarding build failures for this component." fingerprint:"-"` // Hints for how or when to build the component; must not be required for correctness of builds. - Hints ComponentBuildHints `toml:"hints,omitempty" json:"hints,omitempty" jsonschema:"title=Build hints,description=Non-essential hints for how or when to build the component."` + Hints ComponentBuildHints `toml:"hints,omitempty" json:"hints,omitempty" jsonschema:"title=Build hints,description=Non-essential hints for how or when to build the component." fingerprint:"-"` } // ComponentBuildFailureConfig encapsulates configuration and policy regarding a component's diff --git a/internal/projectconfig/component.go b/internal/projectconfig/component.go index 26bdee59..6803b69c 100644 --- a/internal/projectconfig/component.go +++ b/internal/projectconfig/component.go @@ -49,7 +49,7 @@ type Origin struct { // SourceFileReference encapsulates a reference to a specific source file artifact. type SourceFileReference struct { // Reference to the component to which the source file belongs. - Component ComponentReference `toml:"-" json:"-"` + Component ComponentReference `toml:"-" json:"-" fingerprint:"-"` // Name of the source file; must be non-empty. Filename string `toml:"filename" json:"filename"` @@ -61,7 +61,7 @@ type SourceFileReference struct { HashType fileutils.HashType `toml:"hash-type,omitempty" json:"hashType,omitempty" jsonschema:"enum=SHA256,enum=SHA512,title=Hash type,description=Hash algorithm used for the hash value"` // Origin for this source file. When omitted, the file is resolved via the lookaside cache. - Origin Origin `toml:"origin,omitempty" json:"origin,omitempty"` + Origin Origin `toml:"origin,omitempty" json:"origin,omitempty" fingerprint:"-"` } // Defines a component group. Component groups are logical groupings of components (see [ComponentConfig]). @@ -111,11 +111,11 @@ func (g ComponentGroupConfig) WithAbsolutePaths(referenceDir string) ComponentGr // Defines a component. type ComponentConfig struct { // The component's name; not actually present in serialized files. - Name string `toml:"-" json:"name" table:",sortkey"` + Name string `toml:"-" json:"name" table:",sortkey" fingerprint:"-"` // Reference to the source config file that this definition came from; not present // in serialized files. - SourceConfigFile *ConfigFile `toml:"-" json:"-" table:"-"` + SourceConfigFile *ConfigFile `toml:"-" json:"-" table:"-" fingerprint:"-"` // RenderedSpecDir is the output directory for this component's rendered spec files. // Derived at resolve time from the project's rendered-specs-dir setting; not present diff --git a/internal/projectconfig/distro.go b/internal/projectconfig/distro.go index 54094116..89fe7e22 100644 --- a/internal/projectconfig/distro.go +++ b/internal/projectconfig/distro.go @@ -18,7 +18,7 @@ type DistroReference struct { // Version of the referenced distro. Version string `toml:"version,omitempty" json:"version,omitempty" jsonschema:"title=Version,description=Version of the referenced distro"` // Snapshot date/time for source code if specified components will use source as it existed at this time. - Snapshot string `toml:"snapshot,omitempty" json:"snapshot,omitempty" jsonschema:"format=date-time,title=Snapshot,description=If specified use source code as it existed at this date/time"` + Snapshot string `toml:"snapshot,omitempty" json:"snapshot,omitempty" jsonschema:"format=date-time,title=Snapshot,description=If specified use source code as it existed at this date/time" fingerprint:"-"` } // Implements the [Stringer] interface for [DistroReference]. diff --git a/internal/projectconfig/fingerprint_test.go b/internal/projectconfig/fingerprint_test.go new file mode 100644 index 00000000..b3bcb866 --- /dev/null +++ b/internal/projectconfig/fingerprint_test.go @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package projectconfig_test + +import ( + "reflect" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/stretchr/testify/assert" +) + +// TestAllFingerprintedFieldsHaveDecision verifies that every field in every +// fingerprinted struct has been consciously categorized as either included +// (no fingerprint tag) or excluded (`fingerprint:"-"`). +// +// This test serves two purposes: +// 1. It ensures that newly added fields default to **included** in the fingerprint +// (the safe default — you get a false positive, never a false negative). +// 2. It catches accidental removal of `fingerprint:"-"` tags from excluded fields, +// since all exclusions are tracked in expectedExclusions. +func TestAllFingerprintedFieldsHaveDecision(t *testing.T) { + // All struct types whose fields participate in component fingerprinting. + // When adding a new struct that feeds into the fingerprint, add it here. + fingerprintedStructs := []reflect.Type{ + reflect.TypeFor[projectconfig.ComponentConfig](), + reflect.TypeFor[projectconfig.ComponentBuildConfig](), + reflect.TypeFor[projectconfig.CheckConfig](), + reflect.TypeFor[projectconfig.PackageConfig](), + reflect.TypeFor[projectconfig.ComponentOverlay](), + reflect.TypeFor[projectconfig.SpecSource](), + reflect.TypeFor[projectconfig.DistroReference](), + reflect.TypeFor[projectconfig.SourceFileReference](), + } + + // Maps "StructName.FieldName" for every field that should carry a + // `fingerprint:"-"` tag. Catches accidental tag removal. + // + // Each entry documents WHY the field is excluded from the fingerprint: + expectedExclusions := map[string]bool{ + // ComponentConfig.Name — metadata, already the map key in project config. + "ComponentConfig.Name": true, + // ComponentConfig.SourceConfigFile — internal bookkeeping reference, not a build input. + "ComponentConfig.SourceConfigFile": true, + + // ComponentBuildConfig.Failure — CI policy (expected failure tracking), not a build input. + "ComponentBuildConfig.Failure": true, + // ComponentBuildConfig.Hints — scheduling hints (e.g. expensive), not a build input. + "ComponentBuildConfig.Hints": true, + + // CheckConfig.SkipReason — human documentation for why check is skipped, not a build input. + "CheckConfig.SkipReason": true, + + // PackageConfig.Publish — post-build routing (where to publish), not a build input. + "PackageConfig.Publish": true, + + // ComponentOverlay.Description — human-readable documentation for the overlay. + "ComponentOverlay.Description": true, + + // SourceFileReference.Component — back-reference to parent, not a build input. + "SourceFileReference.Component": true, + + // DistroReference.Snapshot — snapshot timestamp is not a build input; the resolved + // upstream commit hash (captured separately via SourceIdentity) is what matters. + // Excluding this prevents a snapshot bump from marking all upstream components as changed. + "DistroReference.Snapshot": true, + + // SourceFileReference.Origin — download location metadata (URI, type), not a build input. + // The file content is already captured by Filename + Hash; changing a CDN URL should not + // trigger a rebuild. + "SourceFileReference.Origin": true, + } + + // Collect all actual exclusions found via reflection, and flag invalid tag values. + actualExclusions := make(map[string]bool) + + for _, st := range fingerprintedStructs { + for i := range st.NumField() { + field := st.Field(i) + key := st.Name() + "." + field.Name + + tag := field.Tag.Get("fingerprint") + + switch tag { + case "": + // No tag — included by default (the safe default). + case "-": + actualExclusions[key] = true + default: + // hashstructure only recognises "" (include) and "-" (exclude). + // Any other value is silently treated as included, which is + // almost certainly a typo. + assert.Failf(t, "invalid fingerprint tag", + "field %q has unrecognised fingerprint tag value %q — "+ + "only `fingerprint:\"-\"` (exclude) is valid; "+ + "remove the tag to include the field", key, tag) + } + } + } + + // Verify every expected exclusion is actually present. + for key := range expectedExclusions { + assert.Truef(t, actualExclusions[key], + "expected field %q to have `fingerprint:\"-\"` tag, but it does not — "+ + "was the tag accidentally removed?", key) + } + + // Verify no unexpected exclusions exist. + for key := range actualExclusions { + assert.Truef(t, expectedExclusions[key], + "field %q has `fingerprint:\"-\"` tag but is not in expectedExclusions — "+ + "add it to expectedExclusions if the exclusion is intentional", key) + } +} diff --git a/internal/projectconfig/overlay.go b/internal/projectconfig/overlay.go index 3699f735..2c603b9e 100644 --- a/internal/projectconfig/overlay.go +++ b/internal/projectconfig/overlay.go @@ -17,7 +17,7 @@ type ComponentOverlay struct { // The type of overlay to apply. Type ComponentOverlayType `toml:"type" json:"type" validate:"required" jsonschema:"enum=spec-add-tag,enum=spec-insert-tag,enum=spec-set-tag,enum=spec-update-tag,enum=spec-remove-tag,enum=spec-prepend-lines,enum=spec-append-lines,enum=spec-search-replace,enum=spec-remove-section,enum=patch-add,enum=patch-remove,enum=file-prepend-lines,enum=file-search-replace,enum=file-add,enum=file-remove,enum=file-rename,title=Overlay type,description=The type of overlay to apply"` // Human readable description of overlay; primarily present to document the need for the change. - Description string `toml:"description,omitempty" json:"description,omitempty" jsonschema:"title=Description,description=Human readable description of overlay"` + Description string `toml:"description,omitempty" json:"description,omitempty" jsonschema:"title=Description,description=Human readable description of overlay" fingerprint:"-"` // For overlays that apply to non-spec files, indicates the filename. For overlays that can // apply to multiple files, supports glob patterns (including globstar). diff --git a/internal/projectconfig/package.go b/internal/projectconfig/package.go index 6c1d6b84..3575e1dd 100644 --- a/internal/projectconfig/package.go +++ b/internal/projectconfig/package.go @@ -24,7 +24,7 @@ type PackagePublishConfig struct { // Currently only publish settings are supported; additional fields may be added in the future. type PackageConfig struct { // Publish holds the publish settings for this package. - Publish PackagePublishConfig `toml:"publish,omitempty" json:"publish,omitempty" jsonschema:"title=Publish settings,description=Publishing settings for this binary package"` + Publish PackagePublishConfig `toml:"publish,omitempty" json:"publish,omitempty" jsonschema:"title=Publish settings,description=Publishing settings for this binary package" fingerprint:"-"` } // MergeUpdatesFrom updates the package config with non-zero values from other. From 7c7a218e759bf3a396123d24ec25b08804f5eead Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Fri, 10 Apr 2026 08:42:58 -0700 Subject: [PATCH 2/2] feedback: add doc --- .../component-identity-and-locking.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/developer/reference/component-identity-and-locking.md diff --git a/docs/developer/reference/component-identity-and-locking.md b/docs/developer/reference/component-identity-and-locking.md new file mode 100644 index 00000000..37fe8685 --- /dev/null +++ b/docs/developer/reference/component-identity-and-locking.md @@ -0,0 +1,42 @@ +# Component Identity & Change Detection + +`azldev` computes a fingerprint for each component based on its config and sources. This enables: + +- **Change detection**: identify which components have changed between two commits or branches, even if the change is a non-obvious config inheritance. +- **Build optimization**: only rebuild changed components and their dependents, skipping unchanged ones. +- **Automatic release bumping**: increment the release tag of changed packages automatically, and include the commits that caused the change in the changelog. + +## Fingerprint Inputs + +A component's fingerprint is a SHA256 combining: + +1. **Config hash** — `hashstructure.Hash()` of the resolved `ComponentConfig` (after all merging). Fields tagged `fingerprint:"-"` are excluded. +2. **Source identity** — content hash for local specs (all files in the spec directory), commit hash for upstream. +3. **Overlay file hashes** — SHA256 of each file referenced by overlay `Source` fields. +4. **Distro name + version** +5. **Manual release bump counter** — increments with each manual release bump, ensuring a new fingerprint even if there are no config or source changes. + +Global change propagation works automatically: the fingerprint operates on the fully-merged config, so a change to a distro or group default changes the resolved config of every inheriting component. + +## `fingerprint:"-"` Tag System + +The `hashstructure` library uses `TagName: "fingerprint"`. Untagged fields are **included by default** (safe default: false positive > false negative). + +A guard test (`TestAllFingerprintedFieldsHaveDecision`) reflects over all fingerprinted structs and maintains a bi-directional allowlist of exclusions. It fails if a `fingerprint:"-"` tag is added without registering it, or if a registered exclusion's tag is removed. + +### Adding a New Config Field + +1. Add the field to the struct in `internal/projectconfig/`. +2. **If NOT a build input**: add `fingerprint:"-"` to the struct tag and register it in `expectedExclusions` in `internal/projectconfig/fingerprint_test.go`. +3. **If a build input**: do nothing — included by default. +4. Run `mage unit`. + +### Adding a New Source Type + +1. Implement `SourceIdentityProvider` on your provider (see `ResolveLocalSourceIdentity` in `localidentity.go` for a simple example). +2. Add a case to `sourceManager.ResolveSourceIdentity()` in `sourcemanager.go`. +3. Add tests in `identityprovider_test.go`. + +## Known Limitations + +- It is difficult to determine WHY a diff occurred (e.g., which specific field changed) since the fingerprint is a single opaque hash. The JSON output includes an `inputs` breakdown (`configHash`, `sourceIdentity`, `overlayFileHashes`, etc.) that can help narrow it down by comparing the two identity files manually.