Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
bundle:
name: config-remote-sync-telemetry-error

resources:
jobs:
foo:
name: test job
tasks:
- task_key: main
notebook_task:
notebook_path: /Workspace/Users/tester@databricks.com/notebook

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions acceptance/bundle/telemetry/config-remote-sync-error/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

>>> errcode [CLI] bundle config-remote-sync
Error: failed to detect changes: state snapshot not available: resources state snapshot not found remotely at resources-config-sync-snapshot.json: state snapshot not found

Exit code: 1

>>> cat out.requests.txt
{
"engine": "terraform",
"error_category": "STATE_NOT_FOUND"
}
7 changes: 7 additions & 0 deletions acceptance/bundle/telemetry/config-remote-sync-error/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Running config-remote-sync without a prior deploy: the state snapshot does
# not exist, so the command fails and telemetry reports STATE_NOT_FOUND.
trace errcode $CLI bundle config-remote-sync

trace cat out.requests.txt | jq 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_config_remote_sync_event | select(. != null)'

rm out.requests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Running without a prior deploy only fails with STATE_NOT_FOUND on the
# terraform engine (the snapshot pull path); pin the matrix accordingly.
[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = ["terraform"]
11 changes: 11 additions & 0 deletions acceptance/bundle/telemetry/config-remote-sync/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
bundle:
name: config-remote-sync-telemetry

resources:
jobs:
foo:
name: test job
tasks:
- task_key: main
notebook_task:
notebook_path: /Workspace/Users/tester@databricks.com/notebook
3 changes: 3 additions & 0 deletions acceptance/bundle/telemetry/config-remote-sync/out.test.toml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions acceptance/bundle/telemetry/config-remote-sync/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/config-remote-sync-telemetry/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

>>> [CLI] bundle config-remote-sync
Detected changes in 1 resource(s):

Resource: resources.jobs.foo
name: replace



>>> cat out.requests.txt
{
"engine": "direct",
"changes_total": 1,
"replace_count": 1,
"resource_changes": [
{
"resource_type": "jobs",
"changes_count": 1,
"replace_count": 1
}
],
"files_changed_count": 1
}
12 changes: 12 additions & 0 deletions acceptance/bundle/telemetry/config-remote-sync/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
trace $CLI bundle deploy

job_id="$(read_id.py foo)"
edit_resource.py jobs $job_id <<EOF
r["name"] = "renamed remotely"
EOF

trace $CLI bundle config-remote-sync

trace cat out.requests.txt | jq 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_config_remote_sync_event | select(. != null)'

rm out.requests.txt
4 changes: 4 additions & 0 deletions acceptance/bundle/telemetry/config-remote-sync/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# The telemetry event includes the engine name, so outputs diverge between
# engines; pin to direct (the default for new bundles).
[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = ["direct"]
4 changes: 3 additions & 1 deletion bundle/configsync/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ func ensureSnapshotAvailable(ctx context.Context, b *bundle.Bundle, engine engin
r, err := f.Read(ctx, remotePathSnapshot)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("resources state snapshot not found remotely at %s", remotePathSnapshot)
// Wrap the sentinel so callers can classify this failure
// (telemetry reports it as STATE_NOT_FOUND).
return fmt.Errorf("resources state snapshot not found remotely at %s: %w", remotePathSnapshot, ErrStateSnapshotNotFound)
}
return fmt.Errorf("reading remote snapshot: %w", err)
}
Expand Down
149 changes: 149 additions & 0 deletions bundle/configsync/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package configsync

import (
"context"
"errors"
"slices"
"strings"

"github.com/databricks/cli/bundle/config/engine"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/telemetry/protos"
)

// ErrStateSnapshotNotFound indicates the deployed state snapshot required for
// change detection does not exist (the bundle was likely never deployed).
var ErrStateSnapshotNotFound = errors.New("state snapshot not found")

// Stats accumulates aggregate counters for a single config-remote-sync run.
// All values are counts, booleans, or enumerated categories; no resource
// names, paths, or configuration values are ever recorded.
type Stats struct {
Save bool
Engine engine.EngineType

ChangesTotal int64
AddCount int64
ReplaceCount int64
RemoveCount int64

// Keyed by resource type (e.g. "jobs"), as parsed from change keys like
// "resources.jobs.foo".
PerResourceType map[string]*protos.BundleConfigRemoteSyncResourceChanges

FilesChangedCount int64
FilesWrittenCount int64

Restore RestoreStats

RawValuesWithVarSyntax int64

ErrorCategory protos.BundleConfigRemoteSyncErrorCategory
}

// RestoreStats counts variable-reference restorations by mechanism.
type RestoreStats struct {
Kept int64
Compound int64
Retargeted int64
FromSiblings int64
}

// CollectChangeStats fills change counters from the raw (pre-restoration)
// detected changes.
func (s *Stats) CollectChangeStats(changes Changes) {
if s.PerResourceType == nil {
s.PerResourceType = make(map[string]*protos.BundleConfigRemoteSyncResourceChanges)
}
for resourceKey, resourceChanges := range changes {
perType := s.PerResourceType[resourceTypeFromKey(resourceKey)]
if perType == nil {
perType = &protos.BundleConfigRemoteSyncResourceChanges{ResourceType: resourceTypeFromKey(resourceKey)}
s.PerResourceType[perType.ResourceType] = perType
}
// Only Add/Replace/Remove reach this function: DetectChanges filters
// out Skip operations and convertChangeDesc never produces Unknown,
// so the totals always equal the per-operation breakdown.
for _, change := range resourceChanges {
s.ChangesTotal++
perType.ChangesCount++
switch change.Operation {
case OperationAdd:
s.AddCount++
perType.AddCount++
case OperationReplace:
s.ReplaceCount++
perType.ReplaceCount++
case OperationRemove:
s.RemoveCount++
perType.RemoveCount++
case OperationUnknown, OperationSkip:
}
s.RawValuesWithVarSyntax += countVarSyntax(change.Value)
}
}
}

// resourceTypeFromKey extracts the resource type from a change key like
// "resources.jobs.foo". Only the type segment is recorded; resource keys are
// never logged.
func resourceTypeFromKey(resourceKey string) string {
parts := strings.SplitN(resourceKey, ".", 3)
if len(parts) < 2 || parts[0] != "resources" {
return "unknown"
}
return parts[1]
}

// countVarSyntax counts string leaves containing the literal "${" sequence.
// Such values are written to YAML verbatim and are subject to interpolation
// on the next deploy, so they measure exposure to escaping issues.
func countVarSyntax(value any) int64 {
var n int64
switch v := value.(type) {
case string:
if strings.Contains(v, "${") {
n++
}
case map[string]any:
for _, val := range v {
n += countVarSyntax(val)
}
case []any:
for _, val := range v {
n += countVarSyntax(val)
}
}
return n
}

// LogTelemetry emits the BundleConfigRemoteSyncEvent for this run.
func (s *Stats) LogTelemetry(ctx context.Context) {
resourceChanges := make([]protos.BundleConfigRemoteSyncResourceChanges, 0, len(s.PerResourceType))
for _, perType := range s.PerResourceType {
resourceChanges = append(resourceChanges, *perType)
}
slices.SortFunc(resourceChanges, func(a, b protos.BundleConfigRemoteSyncResourceChanges) int {
return strings.Compare(a.ResourceType, b.ResourceType)
})

telemetry.Log(ctx, protos.DatabricksCliLog{
BundleConfigRemoteSyncEvent: &protos.BundleConfigRemoteSyncEvent{
Save: s.Save,
Engine: string(s.Engine),
ChangesTotal: s.ChangesTotal,
AddCount: s.AddCount,
ReplaceCount: s.ReplaceCount,
RemoveCount: s.RemoveCount,
ResourceChanges: resourceChanges,
FilesChangedCount: s.FilesChangedCount,
FilesWrittenCount: s.FilesWrittenCount,
RefsKept: s.Restore.Kept,
RefsCompound: s.Restore.Compound,
RefsRetargeted: s.Restore.Retargeted,
RefsFromSiblings: s.Restore.FromSiblings,
RawValuesWithVarSyntax: s.RawValuesWithVarSyntax,
ErrorCategory: s.ErrorCategory,
},
})
}
96 changes: 96 additions & 0 deletions bundle/configsync/telemetry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package configsync

import (
"testing"

"github.com/databricks/cli/libs/dyn"
"github.com/stretchr/testify/assert"
)

func TestCollectChangeStats(t *testing.T) {
changes := Changes{
"resources.jobs.foo": {
"name": {Operation: OperationReplace, Value: "new name"},
"tasks[0].notebook_task": {Operation: OperationAdd, Value: map[string]any{"base_parameters": map[string]any{"p": "${workspace.file_path}/x"}}},
"timeout_seconds": {Operation: OperationRemove},
},
"resources.jobs.bar": {
"name": {Operation: OperationReplace, Value: "other"},
},
"resources.dashboards.dash": {
"etag": {Operation: OperationAdd, Value: "123456"},
},
}

var stats Stats
stats.CollectChangeStats(changes)

assert.Equal(t, int64(5), stats.ChangesTotal)
assert.Equal(t, int64(2), stats.AddCount)
assert.Equal(t, int64(2), stats.ReplaceCount)
assert.Equal(t, int64(1), stats.RemoveCount)
assert.Equal(t, int64(1), stats.RawValuesWithVarSyntax)

jobs := stats.PerResourceType["jobs"]
assert.Equal(t, int64(4), jobs.ChangesCount)
assert.Equal(t, int64(1), jobs.AddCount)
assert.Equal(t, int64(2), jobs.ReplaceCount)
assert.Equal(t, int64(1), jobs.RemoveCount)

dashboards := stats.PerResourceType["dashboards"]
assert.Equal(t, int64(1), dashboards.ChangesCount)
assert.Equal(t, int64(1), dashboards.AddCount)
}

func TestResourceTypeFromKey(t *testing.T) {
assert.Equal(t, "jobs", resourceTypeFromKey("resources.jobs.foo"))
assert.Equal(t, "dashboards", resourceTypeFromKey("resources.dashboards.a.b"))
assert.Equal(t, "unknown", resourceTypeFromKey("variables.foo"))
assert.Equal(t, "unknown", resourceTypeFromKey("resources"))
}

func TestCountVarSyntax(t *testing.T) {
assert.Equal(t, int64(0), countVarSyntax("plain"))
assert.Equal(t, int64(1), countVarSyntax("${var.x}"))
assert.Equal(t, int64(1), countVarSyntax("prefix ${ suffix"))
assert.Equal(t, int64(2), countVarSyntax(map[string]any{
"a": "${var.x}",
"b": []any{"${incomplete", "ok", int64(5)},
}))
}

func TestRestoreStatsCounters(t *testing.T) {
resolved := dyn.V(map[string]dyn.Value{
"variables": dyn.V(map[string]dyn.Value{
"region": dyn.V(map[string]dyn.Value{"value": dyn.V("us-east-1")}),
"other": dyn.V(map[string]dyn.Value{"value": dyn.V("eu-west-1")}),
}),
})

// Original pure ref still matching: counted as kept.
var kept RestoreStats
result := restoreOriginalRefs("us-east-1", dyn.V("${var.region}"), resolved, &kept)
assert.Equal(t, "${var.region}", result)
assert.Equal(t, RestoreStats{Kept: 1}, kept)

// Pure ref whose value changed to another variable's value: re-targeted.
var retargeted RestoreStats
result = restoreOriginalRefs("eu-west-1", dyn.V("${var.region}"), resolved, &retargeted)
assert.Equal(t, "${var.other}", result)
assert.Equal(t, RestoreStats{Retargeted: 1}, retargeted)

// New sequence element leaf restored from a sibling reference.
siblings := []dyn.Value{
dyn.V(map[string]dyn.Value{"region": dyn.V("${var.region}")}),
}
var fromSiblings RestoreStats
resultMap := restoreFromSiblings(map[string]any{"region": "us-east-1"}, siblings, resolved, &fromSiblings).(map[string]any)
assert.Equal(t, "${var.region}", resultMap["region"])
assert.Equal(t, RestoreStats{FromSiblings: 1}, fromSiblings)

// Hardcoded value: nothing restored, nothing counted.
var none RestoreStats
result = restoreOriginalRefs("hardcoded", dyn.V("hardcoded"), resolved, &none)
assert.Equal(t, "hardcoded", result)
assert.Equal(t, RestoreStats{}, none)
}
Loading
Loading