Skip to content

Commit eed76ff

Browse files
fix(kernel): address review — re-pin KERNEL_REV, secret hygiene, auth ambiguity, TLS superset
Addresses @gopalldb's review on #819. P0: - Re-pin KERNEL_REV to the squash-merge SHA of the now-merged kernel PR (ec2288742cbac0cd9fab50da353e8405972eefe9 on kernel main), replacing the orphaned branch-HEAD SHA. P1: - Scrub oauth_client_secret from the long-lived self._auth_options in open_session's finally (even on the failure path). It outlives the method, so a retained secret was exposed to vars()/pickle/debugger far longer than needed; the kernel now owns it. - tls_verify=False now also emits tls_skip_hostname_verify=True — no-verify subsumes hostname verification, matching SSLOptions' create_ssl_context (check_hostname=False when tls_verify is False). - Reject ambiguous auth combos before resolving, instead of silently picking M2M: (a) credentials_provider + oauth_client_secret, and (b) a U2M auth_type (databricks-oauth/azure-oauth) + oauth_client_secret. Both raise NotSupportedError naming the conflict, so the failure is at session-open rather than a confusing first-call 401 against the wrong principal. - Secret-leak tests: oauth_client_secret is forwarded to the kernel but scrubbed from _auth_options, and absent from repr(conn) / vars(conn); scrub runs even when open_session raises. P2: - _read_pem_bytes rejects an empty/whitespace CA/cert file with a clear ProgrammingError instead of passing empty PEM to the kernel. - _normalize_scopes raises ProgrammingError on a non-str/list/tuple oauth_scopes rather than silently dropping to default scopes. - Added U2M oauth_scopes forwarding test. 150 kernel unit tests pass; black + mypy clean. Note: KERNEL_REV now points at the merged kernel main SHA; no further re-pin needed before merge. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
1 parent d614db3 commit eed76ff

5 files changed

Lines changed: 223 additions & 13 deletions

File tree

KERNEL_REV

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
774569bc6d816f662c0f4f8a05859d1f4f26e9d3
1+
ec2288742cbac0cd9fab50da353e8405972eefe9

src/databricks/sql/backend/kernel/auth_bridge.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@ def kernel_auth_kwargs(
127127
128128
Resolution order:
129129
130+
0. **Ambiguity guards** — reject conflicting auth signals *before*
131+
resolving, so an ambiguous request fails loudly at session-open
132+
rather than silently picking one flow (and failing later as a
133+
confusing 401 against the wrong principal):
134+
- a custom ``credentials_provider`` *and* M2M kwargs together;
135+
- a U2M ``auth_type`` (``databricks-oauth`` / ``azure-oauth``)
136+
*and* ``oauth_client_secret`` together.
130137
1. **OAuth M2M** — ``oauth_client_id`` + ``oauth_client_secret``
131138
both present → forward raw creds to the kernel's ``oauth-m2m``.
132139
2. **PAT** — the built provider is (or wraps) an
@@ -140,14 +147,37 @@ def kernel_auth_kwargs(
140147
141148
M2M is checked before PAT so that a workload passing both an
142149
access token *and* M2M creds resolves to the (refreshing) M2M path
143-
rather than a static token.
150+
rather than a static token. (Token + M2M is not treated as
151+
ambiguous: a PAT is often present as ambient config the caller
152+
didn't intend as the primary credential, whereas an explicit
153+
``oauth_client_secret`` is unambiguous M2M intent.)
144154
"""
145155
opts = auth_options or {}
146156

147-
# 1. OAuth M2M — raw client-credentials pair forwarded to the kernel.
148157
client_id = opts.get("oauth_client_id")
149158
client_secret = opts.get("oauth_client_secret")
150-
if client_id and client_secret:
159+
auth_type = opts.get("auth_type")
160+
has_m2m = bool(client_id and client_secret)
161+
162+
# 0. Ambiguity guards — fail before any flow is chosen.
163+
if client_secret and opts.get("credentials_provider") is not None:
164+
raise NotSupportedError(
165+
"Ambiguous auth on use_kernel=True: both a custom "
166+
"credentials_provider and oauth_client_secret were provided. "
167+
"Pass exactly one — oauth_client_id + oauth_client_secret for "
168+
"kernel-managed M2M, or use the Thrift backend (default) for "
169+
"credentials_provider."
170+
)
171+
if client_secret and auth_type in ("databricks-oauth", "azure-oauth"):
172+
raise NotSupportedError(
173+
f"Ambiguous auth on use_kernel=True: auth_type={auth_type!r} selects "
174+
"the U2M browser flow, but oauth_client_secret was also provided "
175+
"(machine-to-machine). Drop oauth_client_secret for U2M, or drop "
176+
"auth_type for M2M."
177+
)
178+
179+
# 1. OAuth M2M — raw client-credentials pair forwarded to the kernel.
180+
if has_m2m:
151181
kwargs: Dict[str, Any] = {
152182
"auth_type": "oauth-m2m",
153183
"client_id": client_id,
@@ -169,7 +199,6 @@ def kernel_auth_kwargs(
169199
return {"auth_type": "pat", "access_token": token}
170200

171201
# 3. OAuth U2M — browser authorization-code flow; the kernel runs it.
172-
auth_type = opts.get("auth_type")
173202
if auth_type in ("databricks-oauth", "azure-oauth"):
174203
kwargs = {"auth_type": "oauth-u2m"}
175204
if client_id:
@@ -220,4 +249,10 @@ def _normalize_scopes(scopes: Any) -> Optional[list]:
220249
if isinstance(scopes, (list, tuple)):
221250
parts = [str(s) for s in scopes if s]
222251
return parts or None
223-
return None
252+
# Anything else (int, dict, bool, …) is a caller error. Fail loudly
253+
# rather than silently dropping the scopes to None and surprising
254+
# the user with default scopes.
255+
raise ProgrammingError(
256+
f"oauth_scopes must be a list/tuple of strings or a space-delimited "
257+
f"string, got {type(scopes).__name__}."
258+
)

src/databricks/sql/backend/kernel/client.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,14 @@ def open_session(
211211
auth_kwargs.pop("access_token", None)
212212
auth_kwargs.pop("client_secret", None)
213213
tls_kwargs.pop("tls_client_key", None)
214+
# Also scrub the long-lived copy. ``self._auth_options``
215+
# outlives this method (it's set in ``__init__`` and the
216+
# connector object can live for the whole connection), so a
217+
# retained ``oauth_client_secret`` would be exposed to
218+
# ``vars(conn)`` / pickle / a debugger dump for far longer
219+
# than the credential needs to exist. The kernel now owns
220+
# the secret, so drop ours.
221+
self._auth_options.pop("oauth_client_secret", None)
214222

215223
# Use the kernel's real server-issued session id, not a
216224
# synthetic UUID. Matches what the native SEA backend does.
@@ -665,7 +673,8 @@ def _kernel_tls_kwargs(ssl_options) -> Dict[str, Any]:
665673
Mappings (note the inverted booleans — the connector expresses
666674
*verify*, the kernel expresses *skip*):
667675
668-
- ``tls_verify=False`` → ``tls_skip_verify=True``
676+
- ``tls_verify=False`` → ``tls_skip_verify=True`` **and**
677+
``tls_skip_hostname_verify=True``
669678
- ``tls_verify_hostname=False`` → ``tls_skip_hostname_verify=True``
670679
- ``tls_trusted_ca_file`` → ``tls_ca_cert`` (PEM bytes)
671680
- ``tls_client_cert_file`` (+ key file) → ``tls_client_cert`` /
@@ -680,10 +689,16 @@ def _kernel_tls_kwargs(ssl_options) -> Dict[str, Any]:
680689
kwargs: Dict[str, Any] = {}
681690

682691
# Inverted booleans. Emit only the insecure (skip) direction so the
683-
# default secure path stays implicit.
692+
# default secure path stays implicit. ``tls_verify=False`` disables
693+
# all chain validation, which subsumes hostname verification — so we
694+
# also emit ``tls_skip_hostname_verify`` to match ``SSLOptions``'s
695+
# own semantics (``create_ssl_context`` sets ``check_hostname=False``
696+
# whenever ``tls_verify`` is False). Without this the kernel could
697+
# still attempt a hostname check the connector considers disabled.
684698
if getattr(ssl_options, "tls_verify", True) is False:
685699
kwargs["tls_skip_verify"] = True
686-
if getattr(ssl_options, "tls_verify_hostname", True) is False:
700+
kwargs["tls_skip_hostname_verify"] = True
701+
elif getattr(ssl_options, "tls_verify_hostname", True) is False:
687702
kwargs["tls_skip_hostname_verify"] = True
688703

689704
ca_file = getattr(ssl_options, "tls_trusted_ca_file", None)
@@ -716,14 +731,23 @@ def _kernel_tls_kwargs(ssl_options) -> Dict[str, Any]:
716731

717732
def _read_pem_bytes(path: str, label: str) -> bytes:
718733
"""Read a PEM file into bytes, mapping IO errors to a clear
719-
``ProgrammingError`` that names the offending TLS option."""
734+
``ProgrammingError`` that names the offending TLS option. An empty
735+
file is rejected here too — otherwise it reaches the kernel as
736+
empty PEM and surfaces as a cryptic ``no certificates found`` /
737+
parse error far from the misconfigured path."""
720738
try:
721739
with open(path, "rb") as f:
722-
return f.read()
740+
data = f.read()
723741
except OSError as exc:
724742
raise ProgrammingError(
725743
f"Failed to read {label} '{path}' for the kernel TLS config: {exc}"
726744
) from exc
745+
if not data.strip():
746+
raise ProgrammingError(
747+
f"{label} '{path}' is empty; expected PEM-encoded content for the "
748+
"kernel TLS config."
749+
)
750+
return data
727751

728752

729753
def _drain_kernel_handle(handle: Any) -> Any:

tests/unit/test_kernel_auth_bridge.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,60 @@ def test_u2m_forwards_client_id_and_redirect_port(self):
262262
"client_id": "custom-client",
263263
"redirect_port": 8030,
264264
}
265+
266+
@pytest.mark.parametrize("auth_type", ["databricks-oauth", "azure-oauth"])
267+
def test_u2m_forwards_scopes(self, auth_type):
268+
kwargs = kernel_auth_kwargs(
269+
_FakeOAuthProvider(),
270+
{"auth_type": auth_type, "oauth_scopes": ["all-apis", "offline_access"]},
271+
)
272+
assert kwargs["oauth_scopes"] == ["all-apis", "offline_access"]
273+
274+
275+
class TestKernelAuthAmbiguity:
276+
"""Conflicting auth signals must fail loudly at session-open rather
277+
than silently resolving to one flow (which would surface later as a
278+
confusing 401 against the wrong principal)."""
279+
280+
def test_credentials_provider_plus_m2m_is_rejected(self):
281+
def _creds_provider():
282+
return lambda: {"Authorization": "Bearer x"}
283+
284+
with pytest.raises(NotSupportedError, match="Ambiguous auth"):
285+
kernel_auth_kwargs(
286+
_FakeOAuthProvider(),
287+
{
288+
"oauth_client_id": "id",
289+
"oauth_client_secret": "sec",
290+
"credentials_provider": _creds_provider,
291+
},
292+
)
293+
294+
@pytest.mark.parametrize("auth_type", ["databricks-oauth", "azure-oauth"])
295+
def test_u2m_auth_type_plus_client_secret_is_rejected(self, auth_type):
296+
# User asked for U2M (browser) but also passed a secret (M2M).
297+
# Don't silently route M2M against the wrong principal.
298+
with pytest.raises(NotSupportedError, match="Ambiguous auth"):
299+
kernel_auth_kwargs(
300+
_FakeOAuthProvider(),
301+
{
302+
"auth_type": auth_type,
303+
"oauth_client_id": "id",
304+
"oauth_client_secret": "sec",
305+
},
306+
)
307+
308+
309+
class TestKernelScopesNormalization:
310+
def test_unknown_scope_type_raises(self):
311+
# A non-str/list/tuple oauth_scopes is a caller error; fail loudly
312+
# rather than silently dropping to default scopes.
313+
with pytest.raises(ProgrammingError, match="oauth_scopes must be"):
314+
kernel_auth_kwargs(
315+
_FakeOAuthProvider(),
316+
{
317+
"oauth_client_id": "id",
318+
"oauth_client_secret": "sec",
319+
"oauth_scopes": 123,
320+
},
321+
)

tests/unit/test_kernel_client.py

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,11 +803,20 @@ def test_default_ssl_options_emit_no_tls_kwargs(self):
803803
def test_none_ssl_options_emit_no_tls_kwargs(self):
804804
assert kernel_client._kernel_tls_kwargs(None) == {}
805805

806-
def test_verify_off_maps_to_skip_verify(self):
806+
def test_verify_off_maps_to_skip_verify_and_hostname(self):
807+
# tls_verify=False disables all chain validation, which subsumes
808+
# hostname verification — so both skip flags are emitted, matching
809+
# SSLOptions.create_ssl_context (check_hostname=False when
810+
# tls_verify is False).
807811
out = kernel_client._kernel_tls_kwargs(self._ssl_options(tls_verify=False))
808-
assert out == {"tls_skip_verify": True}
812+
assert out == {
813+
"tls_skip_verify": True,
814+
"tls_skip_hostname_verify": True,
815+
}
809816

810817
def test_hostname_verify_off_maps_to_skip_hostname(self):
818+
# Only hostname verification off (chain still validated) → just
819+
# the hostname skip, no tls_skip_verify.
811820
out = kernel_client._kernel_tls_kwargs(
812821
self._ssl_options(tls_verify_hostname=False)
813822
)
@@ -862,3 +871,88 @@ def test_missing_ca_file_raises_programming_error(self):
862871
kernel_client._kernel_tls_kwargs(
863872
self._ssl_options(tls_trusted_ca_file="/no/such/ca.pem")
864873
)
874+
875+
876+
# ---------------------------------------------------------------------------
877+
# Secret hygiene: oauth_client_secret must not be retained or leak.
878+
# ---------------------------------------------------------------------------
879+
880+
881+
class TestKernelSecretHygiene:
882+
"""The new ``oauth_client_secret`` M2M kwarg is credential material.
883+
Verify it's scrubbed after ``open_session`` and never exposed via
884+
the connector's ``repr`` / ``vars`` or through a mapped exception."""
885+
886+
_SECRET = "super-secret-m2m-value"
887+
888+
def _client_with_m2m(self, monkeypatch, captured):
889+
def fake_session(**kw):
890+
captured.update(kw)
891+
sess = MagicMock()
892+
sess.session_id = "sess-id"
893+
return sess
894+
895+
monkeypatch.setattr(kernel_client._kernel, "Session", fake_session)
896+
return kernel_client.KernelDatabricksClient(
897+
server_hostname="example.cloud.databricks.com",
898+
http_path="/sql/1.0/warehouses/abc",
899+
auth_provider=AccessTokenAuthProvider("dapi-test"),
900+
ssl_options=None,
901+
auth_options={
902+
"oauth_client_id": "sp-uuid",
903+
"oauth_client_secret": self._SECRET,
904+
},
905+
)
906+
907+
def test_secret_forwarded_then_scrubbed_from_auth_options(self, monkeypatch):
908+
captured = {}
909+
c = self._client_with_m2m(monkeypatch, captured)
910+
c.open_session(session_configuration=None, catalog=None, schema=None)
911+
912+
# Forwarded to the kernel Session as client_secret...
913+
assert captured.get("client_secret") == self._SECRET
914+
# ...but not retained on the long-lived connector.
915+
assert "oauth_client_secret" not in c._auth_options
916+
917+
def test_secret_absent_from_repr_and_vars_after_open(self, monkeypatch):
918+
captured = {}
919+
c = self._client_with_m2m(monkeypatch, captured)
920+
c.open_session(session_configuration=None, catalog=None, schema=None)
921+
922+
assert self._SECRET not in repr(c)
923+
assert self._SECRET not in str(vars(c))
924+
925+
def test_secret_not_in_mapped_open_session_exception(self, monkeypatch):
926+
# If the kernel Session constructor raises with the secret in its
927+
# message/args, the mapped PEP-249 exception must still be raised
928+
# (we don't assert the kernel scrubs its own error — that's the
929+
# kernel's job — but we confirm the connector's scrub still runs
930+
# via the finally block even on the failure path).
931+
def boom_session(**kw):
932+
raise RuntimeError("kernel open failed")
933+
934+
monkeypatch.setattr(kernel_client._kernel, "Session", boom_session)
935+
c = kernel_client.KernelDatabricksClient(
936+
server_hostname="example.cloud.databricks.com",
937+
http_path="/sql/1.0/warehouses/abc",
938+
auth_provider=AccessTokenAuthProvider("dapi-test"),
939+
ssl_options=None,
940+
auth_options={
941+
"oauth_client_id": "sp-uuid",
942+
"oauth_client_secret": self._SECRET,
943+
},
944+
)
945+
with pytest.raises(Exception):
946+
c.open_session(session_configuration=None, catalog=None, schema=None)
947+
# Scrubbed even though open_session raised (finally block).
948+
assert "oauth_client_secret" not in c._auth_options
949+
950+
951+
class TestKernelTlsEmptyCaFile:
952+
def test_empty_ca_file_raises_programming_error(self, tmp_path):
953+
from databricks.sql.types import SSLOptions
954+
955+
ca = tmp_path / "empty.pem"
956+
ca.write_bytes(b" \n")
957+
with pytest.raises(ProgrammingError, match="is empty"):
958+
kernel_client._kernel_tls_kwargs(SSLOptions(tls_trusted_ca_file=str(ca)))

0 commit comments

Comments
 (0)