diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/.databricks/bundle/default/variable-overrides.json b/acceptance/bundle/config-remote-sync/resolve_variables/.databricks/bundle/default/variable-overrides.json new file mode 100644 index 0000000000..64e44e83ed --- /dev/null +++ b/acceptance/bundle/config-remote-sync/resolve_variables/.databricks/bundle/default/variable-overrides.json @@ -0,0 +1,3 @@ +{ + "file_override_var": "from-overrides-file" +} diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/databricks.yml.tmpl b/acceptance/bundle/config-remote-sync/resolve_variables/databricks.yml.tmpl new file mode 100644 index 0000000000..26a6071b97 --- /dev/null +++ b/acceptance/bundle/config-remote-sync/resolve_variables/databricks.yml.tmpl @@ -0,0 +1,53 @@ +bundle: + name: test-bundle-$UNIQUE_NAME + +variables: + my_catalog: + default: main + # Both resolve to same value to test ambiguity (when we have multiple matches → skip) + landing_schema: + default: raw_data + curated_schema: + default: raw_data + # Variable with value set only in target (no default) + target_env: + description: "Environment name" + # Variable set via .databricks/bundle//variable-overrides.json + file_override_var: + description: "Set from variable-overrides.json" + # Complex variable to verify no panics + cluster_config: + type: complex + default: + node_type_id: Standard_DS3_v2 + num_workers: 2 + +resources: + jobs: + my_job: + parameters: + - name: catalog + default: ${var.my_catalog} + - name: env + default: ${var.target_env} + - name: file_val + default: ${var.file_override_var} + # Both use variables that resolve to the same value ("raw_data"). + # Tests disambiguation: original reference is preserved on Replace. + - name: landing + default: ${var.landing_schema} + - name: curated + default: ${var.curated_schema} + tasks: + - task_key: main + notebook_task: + notebook_path: /Users/{{workspace_user_name}}/notebook + base_parameters: + # Compound interpolation: variable embedded in a longer string. + source_path: /mnt/${var.my_catalog}/raw/landing + +targets: + default: + mode: development + variables: + target_env: production diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/out.test.toml b/acceptance/bundle/config-remote-sync/resolve_variables/out.test.toml new file mode 100644 index 0000000000..152a1f10a9 --- /dev/null +++ b/acceptance/bundle/config-remote-sync/resolve_variables/out.test.toml @@ -0,0 +1,9 @@ +Local = true +Cloud = true +RequiresUnityCatalog = true + +[GOOS] + windows = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"] diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/output.txt b/acceptance/bundle/config-remote-sync/resolve_variables/output.txt new file mode 100644 index 0000000000..fabad70819 --- /dev/null +++ b/acceptance/bundle/config-remote-sync/resolve_variables/output.txt @@ -0,0 +1,63 @@ +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +=== Add and replace parameters remotely +=== Detect and save changes +Detected changes in 1 resource(s): + +Resource: resources.jobs.my_job + parameters[name='catalog'].default: replace + parameters[name='data_catalog']: add + parameters[name='deploy_env']: add + parameters[name='file_sourced']: add + parameters[name='region']: add + parameters[name='some_schema']: add + tasks[task_key='main'].notebook_task.base_parameters['source_path']: replace + + + +=== Configuration changes + +>>> diff.py databricks.yml.backup databricks.yml +--- databricks.yml.backup ++++ databricks.yml +@@ -28,5 +28,5 @@ + parameters: + - name: catalog +- default: ${var.my_catalog} ++ default: staging_catalog + - name: env + default: ${var.target_env} +@@ -39,4 +39,14 @@ + - name: curated + default: ${var.curated_schema} ++ - default: ${var.my_catalog} ++ name: data_catalog ++ - default: ${var.target_env} ++ name: deploy_env ++ - default: ${var.file_override_var} ++ name: file_sourced ++ - default: us-west-2 ++ name: region ++ - default: raw_data ++ name: some_schema + tasks: + - task_key: main +@@ -45,5 +55,5 @@ + base_parameters: + # Compound interpolation: variable embedded in a longer string. +- source_path: /mnt/${var.my_catalog}/raw/landing ++ source_path: /mnt/${var.my_catalog}/raw/landing_v2 + + targets: + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/script b/acceptance/bundle/config-remote-sync/resolve_variables/script new file mode 100755 index 0000000000..dbfb09f05b --- /dev/null +++ b/acceptance/bundle/config-remote-sync/resolve_variables/script @@ -0,0 +1,49 @@ +#!/bin/bash + +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +$CLI bundle deploy +job_id="$(read_id.py my_job)" + +title "Add and replace parameters remotely" +edit_resource.py jobs $job_id < restored to \${var.my_catalog} +r["parameters"].append({"name": "data_catalog", "default": "main"}) +# "raw_data" matches two variables (landing_schema, curated_schema) -> ambiguous, stays hardcoded +r["parameters"].append({"name": "some_schema", "default": "raw_data"}) +# "us-west-2" matches no variable -> stays hardcoded +r["parameters"].append({"name": "region", "default": "us-west-2"}) +# "production" matches target_env (set in target, not default) -> restored to \${var.target_env} +r["parameters"].append({"name": "deploy_env", "default": "production"}) +# "from-overrides-file" matches file_override_var (set via variable-overrides.json) -> restored +r["parameters"].append({"name": "file_sourced", "default": "from-overrides-file"}) + +# --- Replace operations (original ref) --- +# Change "catalog" param (originally \${var.my_catalog} = "main") to unrelated value -> hardcoded +for p in r["parameters"]: + if p["name"] == "catalog": + p["default"] = "staging_catalog" + +# Compound interpolation: change only the suffix of source_path. +# The variable part (\${var.my_catalog} = "main") is unchanged -> preserved as \${var.my_catalog} +for t in r["tasks"]: + bp = t.get("notebook_task", {}).get("base_parameters", {}) + if "source_path" in bp: + bp["source_path"] = "/mnt/main/raw/landing_v2" +EOF + +title "Detect and save changes" +echo +cp databricks.yml databricks.yml.backup +$CLI bundle config-remote-sync --save + +title "Configuration changes" +echo +trace diff.py databricks.yml.backup databricks.yml +rm databricks.yml.backup diff --git a/acceptance/bundle/config-remote-sync/resolve_variables/test.toml b/acceptance/bundle/config-remote-sync/resolve_variables/test.toml new file mode 100644 index 0000000000..3174477f7d --- /dev/null +++ b/acceptance/bundle/config-remote-sync/resolve_variables/test.toml @@ -0,0 +1,11 @@ +Cloud = true +RequiresUnityCatalog = true + +RecordRequests = false +Ignore = [".databricks", "databricks.yml", "databricks.yml.backup"] + +[Env] +DATABRICKS_BUNDLE_ENABLE_EXPERIMENTAL_YAML_SYNC = "true" + +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"] diff --git a/bundle/configsync/variables.go b/bundle/configsync/variables.go new file mode 100644 index 0000000000..238ee36e2c --- /dev/null +++ b/bundle/configsync/variables.go @@ -0,0 +1,530 @@ +package configsync + +import ( + "bytes" + "os" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" + "github.com/databricks/cli/libs/dyn/yamlloader" +) + +// varPrefix is the dyn.Path prefix for the ${var.X} shorthand. +var varPrefix = dyn.NewPath(dyn.Key("var")) + +// RestoreVariableReferences replaces hardcoded change values with variable +// references (e.g., ${var.foo}) when the value can be traced back to a +// variable in the original YAML. +// +// For Replace operations, restoration requires the pre-resolved YAML at the +// exact field position. Pure variable references (e.g., ${var.catalog}) are +// restored when their resolved value matches. Compound interpolation strings +// (e.g., "/mnt/${var.account}/raw/landing") are reconstructed by preserving +// variables whose resolved values still appear at their expected positions +// and updating only the literal segments. The reverse map is NOT used for +// Replace — this prevents false positives where a sibling's variable context +// would incorrectly rewrite an unrelated hardcoded field. +// +// For Add operations (new fields), the reverse map is used: if the parent +// subtree uses variables and the new value matches exactly one variable, the +// variable reference is substituted. +func RestoreVariableReferences(b *bundle.Bundle, fieldChanges []FieldChange) { + fileCache := map[string]dyn.Value{} + resolved := b.Config.Value() + + reverseMap := buildVariableReverseMap(resolved, fileCache, fieldChanges) + if len(reverseMap) == 0 { + return + } + + for i := range fieldChanges { + fc := &fieldChanges[i] + if fc.Change.Operation != OperationReplace && fc.Change.Operation != OperationAdd { + continue + } + + preResolved, hasContext := fieldVariableContext(fileCache, fc.FilePath, fc.FieldCandidates) + + var newValue any + if fc.Change.Operation == OperationReplace { + // Replace: only restore if the original field itself was a variable. + if !preResolved.IsValid() { + continue + } + newValue = restoreOriginalRefs(fc.Change.Value, preResolved, resolved) + } else { + // Add: use reverse map when parent context has variables. + if !hasContext { + continue + } + newValue = restoreFromReverseMap(fc.Change.Value, reverseMap) + } + + fc.Change = &ConfigChangeDesc{ + Operation: fc.Change.Operation, + Value: newValue, + } + } +} + +// buildVariableReverseMap discovers all pure variable references in the +// pre-resolved YAML files and pairs each with its resolved value from the +// bundle config. Returns a map from resolved value → reference strings. +func buildVariableReverseMap(resolved dyn.Value, fileCache map[string]dyn.Value, fieldChanges []FieldChange) map[any][]string { + m := map[any][]string{} + seen := map[string]bool{} + + files := map[string]bool{} + for _, fc := range fieldChanges { + files[fc.FilePath] = true + } + + for filePath := range files { + preResolved := loadCachedYAML(fileCache, filePath) + if !preResolved.IsValid() { + continue + } + collectReferences(preResolved, resolved, m, seen) + } + + return m +} + +// collectReferences walks a pre-resolved dyn.Value to find pure variable +// references (e.g., ${var.foo}, ${bundle.name}) and adds them to the reverse +// map keyed by their resolved value. +func collectReferences(preResolved, resolved dyn.Value, m map[any][]string, seen map[string]bool) { + // The callback never returns an error, so WalkReadOnly always returns nil. + _ = dyn.WalkReadOnly(preResolved, func(_ dyn.Path, v dyn.Value) error { + if v.Kind() != dyn.KindString { + return nil + } + s := v.MustString() + if !dynvar.IsPureVariableReference(s) || seen[s] { + return nil + } + seen[s] = true + + resolvedPath, ok := resolveReferencePath(s) + if !ok { + return nil + } + + resolvedV, lookupErr := dyn.GetByPath(resolved, resolvedPath) + if lookupErr != nil { + return nil + } + + switch resolvedV.Kind() { + case dyn.KindString, dyn.KindBool, dyn.KindInt: + m[resolvedV.AsAny()] = append(m[resolvedV.AsAny()], s) + case dyn.KindInvalid, dyn.KindMap, dyn.KindSequence, dyn.KindFloat, dyn.KindTime, dyn.KindNil: + // Skip non-scalar and non-comparable types. + } + + return nil + }) +} + +// resolveReferencePath converts a variable reference string to the dyn.Path +// where its resolved value can be found in the bundle config. It applies the +// same ${var.X} → variables.X.value shorthand rewriting as the variable +// resolution mutator. +func resolveReferencePath(refStr string) (dyn.Path, bool) { + p, ok := dynvar.PureReferenceToPath(refStr) + if !ok { + return nil, false + } + + if p.HasPrefix(varPrefix) && len(p) >= 2 { + newPath := dyn.NewPath( + dyn.Key("variables"), + p[1], + dyn.Key("value"), + ) + if len(p) > 2 { + newPath = newPath.Append(p[2:]...) + } + return newPath, true + } + + return p, true +} + +// restoreOriginalRefs recursively restores variable references for Replace +// operations. For pure variable references, restores when the resolved value +// matches. For compound interpolation (e.g., "${var.X}_suffix"), preserves +// variables whose resolved values still appear at their expected positions. +func restoreOriginalRefs(value any, preResolved, resolved dyn.Value) any { + switch v := value.(type) { + case string, bool, int64: + if ref, ok := matchOriginalRef(value, preResolved, resolved); ok { + return ref + } + if s, ok := value.(string); ok { + if restored, ok := restoreCompoundInterpolation(s, preResolved, resolved); ok { + return restored + } + } + return value + + case map[string]any: + preMap, _ := preResolved.AsMap() + for key, val := range v { + var childPre dyn.Value + if preMap.Len() > 0 { + if p, ok := preMap.GetPairByString(key); ok { + childPre = p.Value + } + } + v[key] = restoreOriginalRefs(val, childPre, resolved) + } + return v + + case []any: + preSeq, _ := preResolved.AsSequence() + for i, val := range v { + var childPre dyn.Value + if i < len(preSeq) { + childPre = preSeq[i] + } + v[i] = restoreOriginalRefs(val, childPre, resolved) + } + return v + + default: + return value + } +} + +// restoreFromReverseMap recursively replaces leaf values with variable +// references for Add operations. Requires exactly one matching variable. +func restoreFromReverseMap(value any, reverseMap map[any][]string) any { + switch v := value.(type) { + case string, bool, int64: + if refs := reverseMap[value]; len(refs) == 1 { + return refs[0] + } + return value + + case map[string]any: + for key, val := range v { + v[key] = restoreFromReverseMap(val, reverseMap) + } + return v + + case []any: + for i, val := range v { + v[i] = restoreFromReverseMap(val, reverseMap) + } + return v + + default: + return value + } +} + +// matchOriginalRef checks if the pre-resolved config value at this position +// was a pure variable reference whose resolved value equals remoteValue. +func matchOriginalRef(remoteValue any, preResolved, resolved dyn.Value) (string, bool) { + if !preResolved.IsValid() { + return "", false + } + s, ok := preResolved.AsString() + if !ok || !dynvar.IsPureVariableReference(s) { + return "", false + } + + resolvedPath, ok := resolveReferencePath(s) + if !ok { + return "", false + } + + resolvedV, err := dyn.GetByPath(resolved, resolvedPath) + if err != nil { + return "", false + } + + if resolvedV.AsAny() == remoteValue { + return s, true + } + return "", false +} + +// restoreCompoundInterpolation handles strings with mixed variable references +// and literal text, e.g., "/mnt/${var.account}/raw/landing". It checks whether +// each variable's resolved value still appears at its expected position in the +// remote string. Variables that match are preserved; changed literal segments +// are updated. Falls back to false if the template can't be aligned. +// +// Known limitation: variable matching is prefix-greedy. If ${var.X}="foo" and +// the remote starts with "footbar...", HasPrefix matches "foo" and the leftover +// "tbar" becomes literal garbage. Adjacent variables without a literal separator +// (e.g., "${var.A}${var.B}") cannot be reliably split if either value changes. +func restoreCompoundInterpolation(remoteValue string, preResolved, resolved dyn.Value) (string, bool) { + if !preResolved.IsValid() { + return "", false + } + template, ok := preResolved.AsString() + if !ok || !dynvar.ContainsVariableReference(template) || dynvar.IsPureVariableReference(template) { + return "", false + } + + // Parse the template into segments: alternating literal and variable parts. + segments := parseTemplateSegments(template, resolved) + if segments == nil { + return "", false + } + + // Walk the remote string, aligning each segment against the template. + // For each segment we try an exact match first. On mismatch we search + // ahead for the next literal anchor to determine the boundary. + // The last segment always consumes all remaining text. + var result strings.Builder + pos := 0 + + for i, seg := range segments { + if pos > len(remoteValue) { + return "", false + } + + remaining := remoteValue[pos:] + isLast := i == len(segments)-1 + + if seg.isVariable { + if seg.resolvedValue == "" { + return "", false + } + if isLast { + // Last segment: variable must match the entire remainder. + if remaining == seg.resolvedValue { + result.WriteString(seg.raw) + } else { + result.WriteString(remaining) + } + pos = len(remoteValue) + } else if strings.HasPrefix(remaining, seg.resolvedValue) { + result.WriteString(seg.raw) + pos += len(seg.resolvedValue) + } else { + end := findAnchorOffset(segments, i+1, remaining) + if end < 0 { + return "", false + } + result.WriteString(remaining[:end]) + pos += end + } + } else { + if isLast { + // Last literal: take the entire remainder (may include suffix changes). + result.WriteString(remaining) + pos = len(remoteValue) + } else if strings.HasPrefix(remaining, seg.raw) { + result.WriteString(seg.raw) + pos += len(seg.raw) + } else { + end := findAnchorOffset(segments, i+1, remaining) + if end < 0 { + return "", false + } + result.WriteString(remaining[:end]) + pos += end + } + } + } + + restored := result.String() + if restored == template { + // Nothing changed — keep the original template as-is. + return template, true + } + if !dynvar.ContainsVariableReference(restored) { + // All variables were lost — no benefit over hardcoding. + return "", false + } + return restored, true +} + +// templateSegment represents either a literal string or a variable reference +// within a template string. +type templateSegment struct { + raw string // as it appears in the template (literal text or "${var.X}") + isVariable bool + resolvedValue string // only set for variable segments +} + +// parseTemplateSegments splits a template string like "/mnt/${var.X}/raw" +// into alternating literal and variable segments, resolving each variable. +// Returns nil if any variable can't be resolved. +func parseTemplateSegments(template string, resolved dyn.Value) []templateSegment { + ref, ok := dynvar.NewRef(dyn.V(template)) + if !ok { + return nil + } + + // Build full match strings: each Matches[i][0] is the "${...}" text. + var segments []templateSegment + cursor := 0 + + for _, m := range ref.Matches { + fullMatch := m[0] // e.g., "${var.catalog}" + + // Find this match in the template starting from cursor. + idx := strings.Index(template[cursor:], fullMatch) + if idx < 0 { + return nil + } + + // Literal before this variable. + if idx > 0 { + segments = append(segments, templateSegment{ + raw: template[cursor : cursor+idx], + }) + } + + resolvedPath, ok := resolveReferencePath(fullMatch) + if !ok { + return nil + } + + resolvedV, err := dyn.GetByPath(resolved, resolvedPath) + if err != nil { + return nil + } + + resolvedStr, ok := resolvedV.AsString() + if !ok { + return nil + } + + segments = append(segments, templateSegment{ + raw: fullMatch, + isVariable: true, + resolvedValue: resolvedStr, + }) + + cursor += idx + len(fullMatch) + } + + // Trailing literal. + if cursor < len(template) { + segments = append(segments, templateSegment{ + raw: template[cursor:], + }) + } + + return segments +} + +// findAnchorOffset searches for the next literal segment's text in remaining +// and returns the offset where it starts. This is used to determine boundaries +// when a variable or literal segment doesn't match at the current position. +// Returns len(remaining) if no subsequent literal exists (last segment case). +// Returns -1 if a subsequent literal exists but can't be found. +func findAnchorOffset(segments []templateSegment, from int, remaining string) int { + for i := from; i < len(segments); i++ { + if !segments[i].isVariable { + idx := strings.Index(remaining, segments[i].raw) + if idx < 0 { + return -1 + } + return idx + } + } + return len(remaining) +} + +// fieldVariableContext returns the pre-resolved dyn.Value at the field path +// and whether the field's parent subtree contains any variable reference. +// The returned dyn.Value is valid only when the field itself was found in the +// pre-resolved YAML (used for Replace). The bool is true when any ancestor +// uses variables (used for Add). +func fieldVariableContext(cache map[string]dyn.Value, filePath string, candidates []string) (dyn.Value, bool) { + configValue := loadCachedYAML(cache, filePath) + if !configValue.IsValid() { + return dyn.InvalidValue, false + } + + for _, candidate := range candidates { + candidate = stripBracketStars(candidate) + + p, err := dyn.NewPathFromString(candidate) + if err != nil { + continue + } + + v, err := dyn.GetByPath(configValue, p) + if err == nil { + if subtreeHasVariableRef(v) { + return v, true + } + } + + if len(p) > 0 { + parent, err := dyn.GetByPath(configValue, p[:len(p)-1]) + if err == nil && subtreeHasVariableRef(parent) { + return dyn.InvalidValue, true + } + } + } + + return dyn.InvalidValue, false +} + +// stripBracketStars removes all [*] segments from a structpath string. +// resolveSelectors inserts [*] at any array position for Add operations +// where the target element doesn't exist yet. +func stripBracketStars(candidate string) string { + return strings.ReplaceAll(candidate, "[*]", "") +} + +// loadCachedYAML parses a YAML file and caches the result. Returns the +// pre-resolved dyn.Value (variable references are still literal strings). +func loadCachedYAML(cache map[string]dyn.Value, filePath string) dyn.Value { + if v, ok := cache[filePath]; ok { + return v + } + + raw, err := os.ReadFile(filePath) + if err != nil { + cache[filePath] = dyn.InvalidValue + return dyn.InvalidValue + } + + v, err := yamlloader.LoadYAML(filePath, bytes.NewBuffer(raw)) + if err != nil { + cache[filePath] = dyn.InvalidValue + return dyn.InvalidValue + } + + cache[filePath] = v + return v +} + +// subtreeHasVariableRef recursively checks whether any string leaf in the +// dyn.Value subtree contains a variable reference. Short-circuits on first find. +func subtreeHasVariableRef(v dyn.Value) bool { + switch v.Kind() { + case dyn.KindString: + return dynvar.ContainsVariableReference(v.MustString()) + case dyn.KindMap: + m, _ := v.AsMap() + for _, p := range m.Pairs() { + if subtreeHasVariableRef(p.Value) { + return true + } + } + case dyn.KindSequence: + s, _ := v.AsSequence() + for _, elem := range s { + if subtreeHasVariableRef(elem) { + return true + } + } + case dyn.KindInvalid, dyn.KindBool, dyn.KindInt, dyn.KindFloat, dyn.KindTime, dyn.KindNil: + // Leaf types that cannot contain variable references. + } + return false +} diff --git a/bundle/configsync/variables_test.go b/bundle/configsync/variables_test.go new file mode 100644 index 0000000000..5ce568c037 --- /dev/null +++ b/bundle/configsync/variables_test.go @@ -0,0 +1,263 @@ +package configsync + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" +) + +func TestRestoreOriginalRefs_ScalarMatchesOriginalVariable(t *testing.T) { + preResolved := dyn.V("${var.catalog}") + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "catalog": dyn.V(map[string]dyn.Value{ + "value": dyn.V("main"), + }), + }), + }) + + result := restoreOriginalRefs("main", preResolved, resolved) + assert.Equal(t, "${var.catalog}", result) +} + +func TestRestoreOriginalRefs_ScalarDoesNotMatchOriginalVariable(t *testing.T) { + preResolved := dyn.V("${var.catalog}") + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "catalog": dyn.V(map[string]dyn.Value{ + "value": dyn.V("main"), + }), + }), + }) + + // Remote changed to a different value — no restoration. + result := restoreOriginalRefs("staging", preResolved, resolved) + assert.Equal(t, "staging", result) +} + +func TestRestoreOriginalRefs_HardcodedFieldNotRewritten(t *testing.T) { + // Pre-resolved field was a plain string, not a variable reference. + preResolved := dyn.V("us-east-1") + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "region": dyn.V(map[string]dyn.Value{ + "value": dyn.V("main"), + }), + }), + }) + + // Even though "main" might match a variable, restoreOriginalRefs must NOT + // rewrite it — the original was hardcoded. + result := restoreOriginalRefs("main", preResolved, resolved) + assert.Equal(t, "main", result) +} + +func TestRestoreOriginalRefs_InvalidPreResolvedSkipped(t *testing.T) { + // When preResolved is invalid (field didn't exist in original YAML), + // restoreOriginalRefs does not modify the value. + result := restoreOriginalRefs("main", dyn.InvalidValue, dyn.V(map[string]dyn.Value{})) + assert.Equal(t, "main", result) +} + +func TestRestoreOriginalRefs_MapRecursesIntoChildren(t *testing.T) { + preResolved := dyn.V(map[string]dyn.Value{ + "catalog": dyn.V("${var.catalog}"), + "region": dyn.V("hardcoded"), + }) + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "catalog": dyn.V(map[string]dyn.Value{ + "value": dyn.V("main"), + }), + }), + }) + + value := map[string]any{ + "catalog": "main", + "region": "main", // same value but original was hardcoded — must NOT rewrite + } + result := restoreOriginalRefs(value, preResolved, resolved) + m := result.(map[string]any) + assert.Equal(t, "${var.catalog}", m["catalog"]) + assert.Equal(t, "main", m["region"]) +} + +func TestRestoreFromReverseMap_UniqueMatch(t *testing.T) { + reverseMap := map[any][]string{ + "main": {"${var.catalog}"}, + } + result := restoreFromReverseMap("main", reverseMap) + assert.Equal(t, "${var.catalog}", result) +} + +func TestRestoreFromReverseMap_AmbiguousSkipped(t *testing.T) { + reverseMap := map[any][]string{ + "raw_data": {"${var.landing_schema}", "${var.curated_schema}"}, + } + result := restoreFromReverseMap("raw_data", reverseMap) + assert.Equal(t, "raw_data", result) +} + +func TestRestoreFromReverseMap_NoMatch(t *testing.T) { + reverseMap := map[any][]string{ + "main": {"${var.catalog}"}, + } + result := restoreFromReverseMap("us-west-2", reverseMap) + assert.Equal(t, "us-west-2", result) +} + +func TestRestoreFromReverseMap_NestedMap(t *testing.T) { + reverseMap := map[any][]string{ + "main": {"${var.catalog}"}, + "dev": {"${bundle.target}"}, + } + value := map[string]any{ + "catalog": "main", + "environment": "dev", + "region": "us-west-2", + } + result := restoreFromReverseMap(value, reverseMap) + m := result.(map[string]any) + assert.Equal(t, "${var.catalog}", m["catalog"]) + assert.Equal(t, "${bundle.target}", m["environment"]) + assert.Equal(t, "us-west-2", m["region"]) +} + +func TestRestoreCompoundInterpolation_SuffixChanged(t *testing.T) { + preResolved := dyn.V("/mnt/${var.storage_account}/raw/landing") + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "storage_account": dyn.V(map[string]dyn.Value{ + "value": dyn.V("devstorageacct"), + }), + }), + }) + + result := restoreOriginalRefs("/mnt/devstorageacct/raw/landing_v2", preResolved, resolved) + assert.Equal(t, "/mnt/${var.storage_account}/raw/landing_v2", result) +} + +func TestRestoreCompoundInterpolation_PrefixVariable(t *testing.T) { + preResolved := dyn.V("${bundle.name}_landing_to_raw") + resolved := dyn.V(map[string]dyn.Value{ + "bundle": dyn.V(map[string]dyn.Value{ + "name": dyn.V("analytics_pipeline"), + }), + }) + + result := restoreOriginalRefs("analytics_pipeline_landing_to_raw_v2", preResolved, resolved) + assert.Equal(t, "${bundle.name}_landing_to_raw_v2", result) +} + +func TestRestoreCompoundInterpolation_MultipleVars(t *testing.T) { + preResolved := dyn.V("jdbc:sqlserver://${var.db_host}:${var.db_port};database=${var.db_name}") + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "db_host": dyn.V(map[string]dyn.Value{"value": dyn.V("dev-sql.example.com")}), + "db_port": dyn.V(map[string]dyn.Value{"value": dyn.V("1433")}), + "db_name": dyn.V(map[string]dyn.Value{"value": dyn.V("analytics_dev")}), + }), + }) + + // Only port changed — host and db_name preserved. + result := restoreOriginalRefs( + "jdbc:sqlserver://dev-sql.example.com:5432;database=analytics_dev", + preResolved, resolved, + ) + assert.Equal(t, "jdbc:sqlserver://${var.db_host}:5432;database=${var.db_name}", result) +} + +func TestRestoreCompoundInterpolation_AllVarsMatch(t *testing.T) { + preResolved := dyn.V("${var.org}-${bundle.name}-job") + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "org": dyn.V(map[string]dyn.Value{"value": dyn.V("acme")}), + }), + "bundle": dyn.V(map[string]dyn.Value{ + "name": dyn.V("etl"), + }), + }) + + // Nothing changed — template restored as-is. + result := restoreOriginalRefs("acme-etl-job", preResolved, resolved) + assert.Equal(t, "${var.org}-${bundle.name}-job", result) +} + +func TestRestoreCompoundInterpolation_NoVarsInOriginal(t *testing.T) { + preResolved := dyn.V("just_a_plain_string") + resolved := dyn.V(map[string]dyn.Value{}) + + result := restoreOriginalRefs("something_else", preResolved, resolved) + assert.Equal(t, "something_else", result) +} + +func TestRestoreCompoundInterpolation_ValueCompletelyDifferent(t *testing.T) { + preResolved := dyn.V("${var.org_prefix}-phi-encryption-key") + resolved := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "org_prefix": dyn.V(map[string]dyn.Value{"value": dyn.V("hc-dev")}), + }), + }) + + // New value has no relationship to the variable. + result := restoreOriginalRefs("master-encryption-key-v2", preResolved, resolved) + // Variable not found in remote string — can't align, falls back to hardcoded. + assert.Equal(t, "master-encryption-key-v2", result) +} + +func TestStripBracketStars(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"resources.jobs.my_job.parameters[*]", "resources.jobs.my_job.parameters"}, + {"resources.jobs.my_job.tasks[*].notebook_task", "resources.jobs.my_job.tasks.notebook_task"}, + {"resources.jobs.my_job.name", "resources.jobs.my_job.name"}, + {"[*].field[*]", ".field"}, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + assert.Equal(t, tt.want, stripBracketStars(tt.input)) + }) + } +} + +func TestFieldVariableContext_FieldHasVariable(t *testing.T) { + cache := map[string]dyn.Value{ + "test.yml": dyn.V(map[string]dyn.Value{ + "name": dyn.V("${var.job_name}"), + }), + } + v, hasCtx := fieldVariableContext(cache, "test.yml", []string{"name"}) + assert.True(t, hasCtx) + assert.True(t, v.IsValid()) + assert.Equal(t, "${var.job_name}", v.MustString()) +} + +func TestFieldVariableContext_NoVariablesAnywhere(t *testing.T) { + cache := map[string]dyn.Value{ + "test.yml": dyn.V(map[string]dyn.Value{ + "name": dyn.V("hardcoded"), + "retries": dyn.V(3), + }), + } + _, hasCtx := fieldVariableContext(cache, "test.yml", []string{"name"}) + assert.False(t, hasCtx) +} + +func TestFieldVariableContext_ParentHasVariableButFieldDoesNot(t *testing.T) { + cache := map[string]dyn.Value{ + "test.yml": dyn.V(map[string]dyn.Value{ + "params": dyn.V(map[string]dyn.Value{ + "catalog": dyn.V("${var.catalog}"), + "region": dyn.V("us-east-1"), + }), + }), + } + // Field "params.region" is hardcoded, but parent "params" has a variable sibling. + // hasContext is true (for Add), but the value is invalid (field itself has no variable). + v, hasCtx := fieldVariableContext(cache, "test.yml", []string{"params.region"}) + assert.True(t, hasCtx) + assert.False(t, v.IsValid()) +} diff --git a/cmd/bundle/config_remote_sync.go b/cmd/bundle/config_remote_sync.go index b172223a83..9b43c6af0d 100644 --- a/cmd/bundle/config_remote_sync.go +++ b/cmd/bundle/config_remote_sync.go @@ -63,6 +63,8 @@ Examples: return fmt.Errorf("failed to resolve field changes: %w", err) } + configsync.RestoreVariableReferences(b, fieldChanges) + files, err := configsync.ApplyChangesToYAML(ctx, b, fieldChanges) if err != nil { return fmt.Errorf("failed to generate YAML files: %w", err)