diff --git a/acceptance/experimental/genie/ask-endpoint-gone/out.test.toml b/acceptance/experimental/genie/ask-endpoint-gone/out.test.toml new file mode 100644 index 0000000000..d6187dcb04 --- /dev/null +++ b/acceptance/experimental/genie/ask-endpoint-gone/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = [] diff --git a/acceptance/experimental/genie/ask-endpoint-gone/output.txt b/acceptance/experimental/genie/ask-endpoint-gone/output.txt new file mode 100644 index 0000000000..e16f43614e --- /dev/null +++ b/acceptance/experimental/genie/ask-endpoint-gone/output.txt @@ -0,0 +1,6 @@ + +=== a removed endpoint tells the user to update the CLI +>>> [CLI] experimental genie ask What are total sales by franchise? +Error: the Genie API is not available on this workspace: No API found for 'POST /data-rooms/tools/onechat/responses'; the endpoint may have moved since this CLI release: update the Databricks CLI to the latest version (run 'databricks version --check') + +Exit code: 1 diff --git a/acceptance/experimental/genie/ask-endpoint-gone/script b/acceptance/experimental/genie/ask-endpoint-gone/script new file mode 100644 index 0000000000..71522bd23c --- /dev/null +++ b/acceptance/experimental/genie/ask-endpoint-gone/script @@ -0,0 +1,2 @@ +title "a removed endpoint tells the user to update the CLI" +errcode trace $CLI experimental genie ask "What are total sales by franchise?" diff --git a/acceptance/experimental/genie/ask-endpoint-gone/test.toml b/acceptance/experimental/genie/ask-endpoint-gone/test.toml new file mode 100644 index 0000000000..577d5f6c7d --- /dev/null +++ b/acceptance/experimental/genie/ask-endpoint-gone/test.toml @@ -0,0 +1,12 @@ +# No bundle engine needed for this command. +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = [] + +# The genie route is an undocumented API that can disappear between releases. +# This is the wire shape a live workspace gateway returns for a removed route. +[[Server]] +Pattern = "POST /api/2.0/data-rooms/tools/onechat/responses" +Response.StatusCode = 404 +Response.Body = ''' +{"error_code":"ENDPOINT_NOT_FOUND","message":"No API found for 'POST /data-rooms/tools/onechat/responses'"} +''' diff --git a/acceptance/experimental/genie/ask-protocol-drift/out.test.toml b/acceptance/experimental/genie/ask-protocol-drift/out.test.toml new file mode 100644 index 0000000000..d6187dcb04 --- /dev/null +++ b/acceptance/experimental/genie/ask-protocol-drift/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = [] diff --git a/acceptance/experimental/genie/ask-protocol-drift/output.txt b/acceptance/experimental/genie/ask-protocol-drift/output.txt new file mode 100644 index 0000000000..c838495d84 --- /dev/null +++ b/acceptance/experimental/genie/ask-protocol-drift/output.txt @@ -0,0 +1,16 @@ + +=== a drifted protocol with no renderable answer tells the user to update the CLI +>>> [CLI] experimental genie ask What are total sales by franchise? +Error: the stream ended without an answer (received 2 events); the API may have changed: update the Databricks CLI to the latest version (run 'databricks version --check'), or re-run with --raw to inspect the raw stream + +Exit code: 1 + +=== json output also reports the drift +>>> [CLI] experimental genie ask What are total sales by franchise? --output json +{ + "status": "error", + "error": "the stream ended without an answer (received 2 events); the API may have changed: update the Databricks CLI to the latest version (run 'databricks version --check'), or re-run with --raw to inspect the raw stream" +} +Error: the stream ended without an answer (received 2 events); the API may have changed: update the Databricks CLI to the latest version (run 'databricks version --check'), or re-run with --raw to inspect the raw stream + +Exit code: 1 diff --git a/acceptance/experimental/genie/ask-protocol-drift/script b/acceptance/experimental/genie/ask-protocol-drift/script new file mode 100644 index 0000000000..1cc2a7db0d --- /dev/null +++ b/acceptance/experimental/genie/ask-protocol-drift/script @@ -0,0 +1,5 @@ +title "a drifted protocol with no renderable answer tells the user to update the CLI" +errcode trace $CLI experimental genie ask "What are total sales by franchise?" + +title "json output also reports the drift" +errcode trace $CLI experimental genie ask "What are total sales by franchise?" --output json diff --git a/acceptance/experimental/genie/ask-protocol-drift/test.toml b/acceptance/experimental/genie/ask-protocol-drift/test.toml new file mode 100644 index 0000000000..65cd1d283b --- /dev/null +++ b/acceptance/experimental/genie/ask-protocol-drift/test.toml @@ -0,0 +1,15 @@ +# No bundle engine needed for this command. +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = [] + +# A protocol change that renames item types (or moves the answer elsewhere) +# leaves the stream syntactically valid but free of anything this build can +# render. The command must fail with update advice, not exit 0 with no output. +[[Server]] +Pattern = "POST /api/2.0/data-rooms/tools/onechat/responses" +Response.Body = ''' +data: {"type":"response.output_item.added","output_index":0,"item":{"type":"agent_step_v2","id":"s1","status":"completed"}} + +data: {"type":"response.completed","response":{"id":"resp_1","status":"completed","conversation_id":"conv_1"}} + +''' diff --git a/acceptance/experimental/genie/ask-request-drift/out.test.toml b/acceptance/experimental/genie/ask-request-drift/out.test.toml new file mode 100644 index 0000000000..d6187dcb04 --- /dev/null +++ b/acceptance/experimental/genie/ask-request-drift/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = [] diff --git a/acceptance/experimental/genie/ask-request-drift/output.txt b/acceptance/experimental/genie/ask-request-drift/output.txt new file mode 100644 index 0000000000..adc75fec7d --- /dev/null +++ b/acceptance/experimental/genie/ask-request-drift/output.txt @@ -0,0 +1,6 @@ + +=== a 500 with no message points at a possible request format change +>>> [CLI] experimental genie ask What are total sales by franchise? +Error: the Genie backend could not process the request (500 with no details); if this keeps happening, the request format may have changed since this CLI release: update the Databricks CLI to the latest version (run 'databricks version --check') + +Exit code: 1 diff --git a/acceptance/experimental/genie/ask-request-drift/script b/acceptance/experimental/genie/ask-request-drift/script new file mode 100644 index 0000000000..c180e2cfeb --- /dev/null +++ b/acceptance/experimental/genie/ask-request-drift/script @@ -0,0 +1,2 @@ +title "a 500 with no message points at a possible request format change" +errcode trace $CLI experimental genie ask "What are total sales by franchise?" diff --git a/acceptance/experimental/genie/ask-request-drift/test.toml b/acceptance/experimental/genie/ask-request-drift/test.toml new file mode 100644 index 0000000000..59339f04f5 --- /dev/null +++ b/acceptance/experimental/genie/ask-request-drift/test.toml @@ -0,0 +1,13 @@ +# No bundle engine needed for this command. +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = [] + +# Wire shape observed live when the backend cannot interpret the request body +# (e.g. the expected request shape changed): a 500 INTERNAL_ERROR with an +# empty message, which would otherwise render as a blank "Error: ". +[[Server]] +Pattern = "POST /api/2.0/data-rooms/tools/onechat/responses" +Response.StatusCode = 500 +Response.Body = ''' +{"error_code":"INTERNAL_ERROR","message":""} +''' diff --git a/experimental/genie/agentstream/renderer.go b/experimental/genie/agentstream/renderer.go index eab474fea0..d9c3c4fec6 100644 --- a/experimental/genie/agentstream/renderer.go +++ b/experimental/genie/agentstream/renderer.go @@ -27,6 +27,11 @@ const defaultChartWidth = 80 // artifacts behind when the spinner erases it. const maxStatusRunes = 100 +// UpdateCLIAdvice tells the user how to recover when the undocumented API +// behind an experimental command has changed or moved: a newer CLI built +// against the current wire format is the only user-side fix. +const UpdateCLIAdvice = "update the Databricks CLI to the latest version (run 'databricks version --check')" + // RenderDebug prints every raw SSE data line to w as-is. func RenderDebug(r io.Reader, w io.Writer) error { reader := NewSSEReader(r) @@ -156,7 +161,7 @@ func RenderText(ctx context.Context, r io.Reader, stdout, stderr io.Writer, adap warnUnparsed(stderr, unparsed) if !answered { - return fmt.Errorf("the stream ended without an answer (received %d events); re-run with --raw to inspect the raw stream", events) + return noAnswerError(events) } if !sawDone { fmt.Fprintln(stderr, "Warning: the stream ended without a completion event; the answer may be incomplete.") @@ -244,7 +249,7 @@ loop: if apiErr == nil { if result.Text == "" && len(result.ToolCalls) == 0 { result.Status = statusError - apiErr = fmt.Errorf("the stream ended without an answer (received %d events); re-run with --raw to inspect the raw stream", events) + apiErr = noAnswerError(events) } else if result.Status == statusIncomplete { // Keep status "incomplete": an answer was produced, the server // just never confirmed completion. @@ -279,12 +284,19 @@ func apiError(se StreamEvent) error { return fmt.Errorf("API error: %s: %s", se.ErrorCode, se.Text) } +// noAnswerError reports a stream that ended without any user-visible answer. +// Short of a server bug, this means the wire format drifted away from what +// this build understands, so the message leads with the CLI update advice. +func noAnswerError(events int) error { + return fmt.Errorf("the stream ended without an answer (received %d events); the API may have changed: %s, or re-run with --raw to inspect the raw stream", events, UpdateCLIAdvice) +} + // warnUnparsed reports events the adapter recognized but could not decode. // These are dropped from rendering, and a wire format drift that drops // everything must be visible rather than an empty success. func warnUnparsed(stderr io.Writer, unparsed int) { if unparsed > 0 { - fmt.Fprintf(stderr, "Warning: %d stream event(s) could not be parsed and were ignored; re-run with --raw to inspect the raw stream.\n", unparsed) + fmt.Fprintf(stderr, "Warning: %d stream event(s) could not be parsed and were ignored; the API may have changed: %s, or re-run with --raw to inspect the raw stream.\n", unparsed, UpdateCLIAdvice) } } diff --git a/experimental/genie/agentstream/renderer_test.go b/experimental/genie/agentstream/renderer_test.go index 86f0c45bbe..a9e08d089b 100644 --- a/experimental/genie/agentstream/renderer_test.go +++ b/experimental/genie/agentstream/renderer_test.go @@ -233,6 +233,7 @@ func TestRenderText_NoAnswerFails(t *testing.T) { err := RenderText(testCtx(t), strings.NewReader(input), &stdout, &stderr, adapt, RenderOptions{}) require.Error(t, err) assert.Contains(t, err.Error(), "without an answer") + assert.Contains(t, err.Error(), "update the Databricks CLI to the latest version") assert.Contains(t, err.Error(), "--raw") } @@ -259,6 +260,7 @@ func TestRenderText_UnparsedEventsWarn(t *testing.T) { err := RenderText(testCtx(t), strings.NewReader(input), &stdout, &stderr, adapt, RenderOptions{}) require.NoError(t, err) assert.Contains(t, stderr.String(), "2 stream event(s) could not be parsed") + assert.Contains(t, stderr.String(), "update the Databricks CLI to the latest version") } func TestRenderText_VizChartAfterText(t *testing.T) { @@ -401,6 +403,7 @@ func TestRenderJSON_EmptyStreamFails(t *testing.T) { err := RenderJSON(strings.NewReader(""), &buf, &stderr, adapt) require.Error(t, err) assert.Contains(t, err.Error(), "without an answer") + assert.Contains(t, err.Error(), "update the Databricks CLI to the latest version") var result StreamResult require.NoError(t, json.Unmarshal(buf.Bytes(), &result)) diff --git a/experimental/genie/client.go b/experimental/genie/client.go index 0e4cffa9c9..6cea22c1ba 100644 --- a/experimental/genie/client.go +++ b/experimental/genie/client.go @@ -2,8 +2,12 @@ package genie import ( "context" + "errors" + "fmt" "io" + "github.com/databricks/cli/experimental/genie/agentstream" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/client" "github.com/databricks/databricks-sdk-go/config" ) @@ -55,6 +59,29 @@ func PostStream(ctx context.Context, cfg *config.Config, req GenieRequest) (io.R "Accept": "text/event-stream", } err = api.Do(ctx, "POST", genieResponsesPath, headers, nil, req, &body) + // The route is fixed and carries no resource IDs, so a 404 normally means + // the endpoint itself is gone: the backend route is undocumented and can + // move or be disabled between Databricks releases (a removed route returns + // 404 ENDPOINT_NOT_FOUND, "No API found for ...", which the SDK maps to + // plain ErrNotFound). A 404 RESOURCE_DOES_NOT_EXIST is excluded: it refers + // to something the request named (e.g. the warehouse) and must keep the + // backend's own message instead of blaming the endpoint. + if errors.Is(err, apierr.ErrNotFound) && !errors.Is(err, apierr.ErrResourceDoesNotExist) { + return nil, fmt.Errorf("the Genie API is not available on this workspace: %w; the endpoint may have moved since this CLI release: %s", err, agentstream.UpdateCLIAdvice) + } + // A request body the backend cannot interpret (e.g. after its expected + // request shape changed) surfaces as a 500 INTERNAL_ERROR with an empty + // message (observed live), leaving the user a blank error. Transient + // backend faults share the status code, hence the hedged advice. + if errors.Is(err, apierr.ErrInternalError) { + if apiErr, ok := errors.AsType[*apierr.APIError](err); ok && apiErr.Message == "" { + // An empty message would render as "request: ;" mid-sentence, so + // the observed no-details shape gets its own wording. The %w + // keeps the error chain and renders as nothing. + return nil, fmt.Errorf("the Genie backend could not process the request (500 with no details)%w; if this keeps happening, the request format may have changed since this CLI release: %s", err, agentstream.UpdateCLIAdvice) + } + return nil, fmt.Errorf("the Genie backend could not process the request: %w; if this keeps happening, the request format may have changed since this CLI release: %s", err, agentstream.UpdateCLIAdvice) + } if err != nil { return nil, err } diff --git a/experimental/genie/client_test.go b/experimental/genie/client_test.go index 5530e56a45..f31804e85c 100644 --- a/experimental/genie/client_test.go +++ b/experimental/genie/client_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/databricks/cli/experimental/genie/agentstream" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/config" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -67,6 +68,45 @@ func TestPostStream(t *testing.T) { assert.JSONEq(t, `{"type":"response.completed"}`, ev.Data) } +func TestPostStream_EndpointGone(t *testing.T) { + // Wire shape a live workspace gateway returns for a route that does not + // exist. The genie route is undocumented and can disappear between + // releases; the error must point at a CLI update instead of leaking a + // bare "No API found". + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + fmt.Fprint(w, `{"error_code":"ENDPOINT_NOT_FOUND","message":"No API found for 'POST /data-rooms/tools/onechat/responses'"}`) + })) + defer srv.Close() + + cfg := &config.Config{Host: srv.URL, Token: "dummy"} + _, err := PostStream(t.Context(), cfg, BuildRequest("q", "")) + require.Error(t, err) + assert.ErrorIs(t, err, apierr.ErrNotFound) + assert.Contains(t, err.Error(), "No API found") + assert.Contains(t, err.Error(), "update the Databricks CLI to the latest version") +} + +func TestPostStream_ResourceNotFound(t *testing.T) { + // A 404 RESOURCE_DOES_NOT_EXIST refers to a resource the request named + // (the warehouse), not the route: it must keep the backend's message and + // not claim the endpoint moved. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + fmt.Fprint(w, `{"error_code":"RESOURCE_DOES_NOT_EXIST","message":"Warehouse wh-missing does not exist"}`) + })) + defer srv.Close() + + cfg := &config.Config{Host: srv.URL, Token: "dummy"} + _, err := PostStream(t.Context(), cfg, BuildRequest("q", "wh-missing")) + require.Error(t, err) + assert.ErrorIs(t, err, apierr.ErrResourceDoesNotExist) + assert.Contains(t, err.Error(), "Warehouse wh-missing does not exist") + assert.NotContains(t, err.Error(), "update the Databricks CLI") +} + func TestPostStream_HTTPError(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") @@ -79,4 +119,24 @@ func TestPostStream_HTTPError(t *testing.T) { _, err := PostStream(t.Context(), cfg, BuildRequest("q", "")) require.Error(t, err) assert.Contains(t, err.Error(), "backend exploded") + assert.Contains(t, err.Error(), "update the Databricks CLI to the latest version") +} + +func TestPostStream_InternalErrorEmptyMessage(t *testing.T) { + // Wire shape observed live for a request body the backend cannot + // interpret: 500 INTERNAL_ERROR with an empty message. Without the wrap + // the user sees a blank "Error: ". + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprint(w, `{"error_code":"INTERNAL_ERROR","message":""}`) + })) + defer srv.Close() + + cfg := &config.Config{Host: srv.URL, Token: "dummy"} + _, err := PostStream(t.Context(), cfg, BuildRequest("q", "")) + require.Error(t, err) + assert.ErrorIs(t, err, apierr.ErrInternalError) + assert.Contains(t, err.Error(), "could not process the request (500 with no details)") + assert.Contains(t, err.Error(), "update the Databricks CLI to the latest version") }