From 7ec19e78886f8571044fb7ee4bcdd6705dff008f Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 22 Apr 2026 00:09:39 +0530 Subject: [PATCH 01/10] Quote bind parameter names containing non-identifier characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Column names sourced from DataFrames frequently contain hyphens (e.g. `col-with-hyphen`). SQLAlchemy uses the column name as the default bind parameter name, and Databricks named-parameter markers (`:name`) only accept bare identifiers ([A-Za-z_][A-Za-z0-9_]*). The hyphen was being emitted verbatim, producing invalid SQL like `:col-with-hyphen` which the server rejects with UNBOUND_SQL_PARAMETER because it parses `-with-hyphen` as stray tokens. Override `DatabricksStatementCompiler.bindparam_string` to wrap non-bare-identifier names in backticks (`:`col-with-hyphen``), which the Spark/Databricks SQL grammar accepts as a quoted parameter identifier (`simpleIdentifier -> quotedIdentifier` in `SqlBaseParser.g4`). This mirrors Oracle's `:"name"` approach to the same problem. The backticks are quoting syntax only — the parameter's logical name is still the text between them, so the params dict sent to the driver keeps the original unquoted key. `escaped_bind_names` is intentionally left empty so `construct_params` passes keys through unchanged. This covers hyphens, spaces, dots, brackets, leading digits, and any other character outside [A-Za-z0-9_], with no risk of collisions between sibling columns like `col-name` and `col_name` (a concern with single-character escape-map approaches). Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 45 ++++++++++++ tests/test_local/test_ddl.py | 116 +++++++++++++++++++++++++++++- 2 files changed, 160 insertions(+), 1 deletion(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index d5d0bf8..5376a79 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -84,6 +84,51 @@ def get_column_specification(self, column, **kwargs): class DatabricksStatementCompiler(compiler.SQLCompiler): + # Names that a bare Databricks named-parameter marker (`:name`) accepts: + # a letter or underscore followed by letters, digits, or underscores. + # Anything outside that set — hyphens, spaces, dots, brackets, a leading + # digit, etc. — must be wrapped in backticks (`:`name``), which the + # Spark/Databricks SQL grammar accepts as a quoted parameter identifier. + _bindname_is_bare_identifier = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + def bindparam_string(self, name, **kw): + """Render a bind parameter marker. + + Databricks named parameter markers only accept bare identifiers + ([A-Za-z_][A-Za-z0-9_]*) out of the box. DataFrame-origin column + names frequently contain hyphens (e.g. ``col-with-hyphen``), which + SQLAlchemy would otherwise pass through verbatim and produce an + invalid marker ``:col-with-hyphen`` — the parser splits on ``-`` + and reports UNBOUND_SQL_PARAMETER. + + The Spark SQL grammar accepts a quoted form ``:`col-with-hyphen```, + mirroring Oracle's ``:"name"`` pattern. The backticks are *quoting* + only: the parameter's logical name is still the text between them, + so the params dict sent to the driver must keep the original + unquoted key. We therefore emit the backticked marker directly + without populating ``escaped_bind_names`` — leaving the key + translation in ``construct_params`` a no-op. + + For bare identifiers (the common case), we fall through to the + default implementation so INSERT/SELECT output stays unchanged. + """ + if ( + not kw.get("escaped_from") + and not kw.get("post_compile", False) + and not self._bindname_is_bare_identifier.match(name) + ): + accumulate = kw.get("accumulate_bind_names") + if accumulate is not None: + accumulate.add(name) + visited = kw.get("visited_bindparam") + if visited is not None: + visited.append(name) + quoted = f"`{name}`" + if self.state is compiler.CompilerState.COMPILING: + return self.compilation_bindtemplate % {"name": quoted} + return self.bindtemplate % {"name": quoted} + return super().bindparam_string(name, **kw) + def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, since Databricks SQL doesn't support the latter. diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py index 9b19acf..f44c835 100644 --- a/tests/test_local/test_ddl.py +++ b/tests/test_local/test_ddl.py @@ -1,5 +1,5 @@ import pytest -from sqlalchemy import Column, MetaData, String, Table, Numeric, Integer, create_engine +from sqlalchemy import Column, MetaData, String, Table, Numeric, Integer, create_engine, insert from sqlalchemy.schema import ( CreateTable, DropColumnComment, @@ -114,3 +114,117 @@ def test_create_table_with_complex_type(self, metadata): assert "array_array_string ARRAY>" in output assert "map_string_string MAP" in output assert "variant_col VARIANT" in output + + +class TestBindParamQuoting(DDLTestBase): + """Regression tests for column names that contain characters which are not + legal inside a bare Databricks named-parameter marker (`:name`). Without + the custom ``bindparam_string`` override, a column like + ``col-with-hyphen`` produces SQL like ``VALUES (:col-with-hyphen)`` which + fails with UNBOUND_SQL_PARAMETER on the server. The fix wraps such names + in backticks (``VALUES (:`col-with-hyphen`)``), which the Databricks SQL + grammar accepts as a quoted parameter identifier. + """ + + def _compile_insert(self, table, values): + stmt = insert(table).values(values) + return stmt.compile(bind=self.engine) + + def test_hyphenated_column_renders_backticked_bind_marker(self): + metadata = MetaData() + table = Table( + "t", + metadata, + Column("col-with-hyphen", String()), + Column("normal_col", String()), + ) + compiled = self._compile_insert( + table, {"col-with-hyphen": "x", "normal_col": "y"} + ) + + sql = str(compiled) + # Hyphenated name is wrapped in backticks at the marker site + assert ":`col-with-hyphen`" in sql + # Plain name is untouched + assert ":normal_col" in sql + # The params dict sent to the driver keeps the ORIGINAL unquoted key + # — this matches what the Databricks server expects (verified + # empirically: a backticked marker `:`name`` binds against a plain + # `name` key in the params dict). + params = compiled.construct_params() + assert params["col-with-hyphen"] == "x" + assert params["normal_col"] == "y" + assert "`col-with-hyphen`" not in params + + def test_hyphen_and_underscore_columns_do_not_collide(self): + """A table containing both ``col-name`` and ``col_name`` must produce + two distinct bind parameters with two distinct dict keys; otherwise + one value would silently clobber the other. + """ + metadata = MetaData() + table = Table( + "t", + metadata, + Column("col-name", String()), + Column("col_name", String()), + ) + compiled = self._compile_insert( + table, {"col-name": "hyphen_value", "col_name": "underscore_value"} + ) + + sql = str(compiled) + assert ":`col-name`" in sql + assert ":col_name" in sql + + params = compiled.construct_params() + assert params["col-name"] == "hyphen_value" + assert params["col_name"] == "underscore_value" + + def test_plain_identifier_bind_names_are_unchanged(self): + """No regression: ordinary column names must not be backticked.""" + metadata = MetaData() + table = Table( + "t", + metadata, + Column("id", String()), + Column("name", String()), + ) + compiled = self._compile_insert(table, {"id": "1", "name": "n"}) + sql = str(compiled) + assert ":id" in sql + assert ":name" in sql + assert ":`id`" not in sql + assert ":`name`" not in sql + + def test_space_and_dot_in_column_name_also_backticked(self): + """The bare-identifier check covers all non-[A-Za-z0-9_] characters, + not just hyphens — spaces, dots, etc. should also be wrapped. + """ + metadata = MetaData() + table = Table( + "t", + metadata, + Column("col with space", String()), + Column("col.with.dot", String()), + ) + compiled = self._compile_insert( + table, {"col with space": "s", "col.with.dot": "d"} + ) + sql = str(compiled) + assert ":`col with space`" in sql + assert ":`col.with.dot`" in sql + + params = compiled.construct_params() + assert params["col with space"] == "s" + assert params["col.with.dot"] == "d" + + def test_leading_digit_column_is_backticked(self): + """Databricks bind names cannot start with a digit either.""" + metadata = MetaData() + table = Table("t", metadata, Column("1col", String())) + compiled = self._compile_insert(table, {"1col": "x"}) + sql = str(compiled) + assert ":`1col`" in sql + + params = compiled.construct_params() + assert params["1col"] == "x" From 6326588574aef6fa92471f37f8c4837780713477 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 22 Apr 2026 12:13:20 +0530 Subject: [PATCH 02/10] Simplify to unconditional backtick quoting, gate behind flag Following review feedback: the conditional check based on whether the name matches a bare-identifier pattern is unnecessary. The Spark/ Databricks SQL grammar accepts :`name` for every valid identifier (verified empirically against a live SQL warehouse), so wrapping unconditionally keeps the compiler simpler and removes a class of edge-case bugs that the condition could miss. Add a ``quote_bind_params`` flag on DatabricksDialect (default True) that can be turned off via the URL query parameter ``?quote_bind_params=false`` as an escape hatch if the quoting ever introduces an unexpected regression. When disabled, we fall through entirely to stock SQLAlchemy bind-name rendering. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 82 +++++++++++++------------- src/databricks/sqlalchemy/base.py | 13 +++++ tests/test_local/test_ddl.py | 97 +++++++++++++++++++++---------- 3 files changed, 120 insertions(+), 72 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index 5376a79..9e99b1c 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -84,50 +84,52 @@ def get_column_specification(self, column, **kwargs): class DatabricksStatementCompiler(compiler.SQLCompiler): - # Names that a bare Databricks named-parameter marker (`:name`) accepts: - # a letter or underscore followed by letters, digits, or underscores. - # Anything outside that set — hyphens, spaces, dots, brackets, a leading - # digit, etc. — must be wrapped in backticks (`:`name``), which the - # Spark/Databricks SQL grammar accepts as a quoted parameter identifier. - _bindname_is_bare_identifier = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") - def bindparam_string(self, name, **kw): - """Render a bind parameter marker. - - Databricks named parameter markers only accept bare identifiers - ([A-Za-z_][A-Za-z0-9_]*) out of the box. DataFrame-origin column - names frequently contain hyphens (e.g. ``col-with-hyphen``), which - SQLAlchemy would otherwise pass through verbatim and produce an - invalid marker ``:col-with-hyphen`` — the parser splits on ``-`` - and reports UNBOUND_SQL_PARAMETER. - - The Spark SQL grammar accepts a quoted form ``:`col-with-hyphen```, - mirroring Oracle's ``:"name"`` pattern. The backticks are *quoting* - only: the parameter's logical name is still the text between them, - so the params dict sent to the driver must keep the original - unquoted key. We therefore emit the backticked marker directly - without populating ``escaped_bind_names`` — leaving the key - translation in ``construct_params`` a no-op. - - For bare identifiers (the common case), we fall through to the - default implementation so INSERT/SELECT output stays unchanged. + """Render a bind parameter marker wrapped in backticks. + + Databricks named parameter markers accept two identifier forms + (per ``SqlBaseParser.g4``): a bare ``IDENTIFIER`` + (``[A-Za-z_][A-Za-z0-9_]*``) or a ``quotedIdentifier`` wrapped in + backticks. DataFrame-origin column names frequently contain + hyphens (e.g. ``col-with-hyphen``), which SQLAlchemy would + otherwise pass through verbatim and produce an invalid marker + ``:col-with-hyphen`` — the parser splits on ``-`` and reports + UNBOUND_SQL_PARAMETER. + + Backticks are valid for *every* identifier (plain names included), + verified empirically against a Databricks SQL warehouse, so we + wrap unconditionally. This mirrors Oracle's ``:"name"`` approach + to the same grammar constraint and eliminates the collision risk + that any single-character escape map would carry (e.g. ``col-name`` + vs ``col_name`` both mapping to ``:col_name``). + + The backticks are SQL-side *quoting* only: the parameter's + logical name is still the text between them, so the params dict + sent to the driver keeps the original unquoted key. We therefore + leave ``escaped_bind_names`` untouched — ``construct_params`` + passes keys through unchanged. + + Gated by ``DatabricksDialect.quote_bind_params``. Set + ``?quote_bind_params=false`` on the SQLAlchemy URL to fall back + to stock bind-name rendering. """ if ( - not kw.get("escaped_from") - and not kw.get("post_compile", False) - and not self._bindname_is_bare_identifier.match(name) + kw.get("post_compile", False) + or kw.get("escaped_from") + or not getattr(self.dialect, "quote_bind_params", True) ): - accumulate = kw.get("accumulate_bind_names") - if accumulate is not None: - accumulate.add(name) - visited = kw.get("visited_bindparam") - if visited is not None: - visited.append(name) - quoted = f"`{name}`" - if self.state is compiler.CompilerState.COMPILING: - return self.compilation_bindtemplate % {"name": quoted} - return self.bindtemplate % {"name": quoted} - return super().bindparam_string(name, **kw) + return super().bindparam_string(name, **kw) + + accumulate = kw.get("accumulate_bind_names") + if accumulate is not None: + accumulate.add(name) + visited = kw.get("visited_bindparam") + if visited is not None: + visited.append(name) + quoted = f"`{name}`" + if self.state is compiler.CompilerState.COMPILING: + return self.compilation_bindtemplate % {"name": quoted} + return self.bindtemplate % {"name": quoted} def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, diff --git a/src/databricks/sqlalchemy/base.py b/src/databricks/sqlalchemy/base.py index aaba181..7d65812 100644 --- a/src/databricks/sqlalchemy/base.py +++ b/src/databricks/sqlalchemy/base.py @@ -66,6 +66,15 @@ class DatabricksDialect(default.DefaultDialect): supports_sequences: bool = False supports_native_boolean: bool = True + # When True (default), every named bind parameter is rendered wrapped in + # backticks (`` :`name` ``) so that column names containing characters + # which are illegal in bare Databricks parameter identifiers (hyphens, + # spaces, dots, leading digits, etc.) work transparently. Set to False + # via the URL query string — ``?quote_bind_params=false`` — to fall back + # to stock SQLAlchemy bind-name rendering if this quoting causes an + # unexpected regression. + quote_bind_params: bool = True + colspecs = { sqlalchemy.types.DateTime: dialect_type_impl.TIMESTAMP_NTZ, sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, @@ -117,6 +126,10 @@ def create_connect_args(self, url): self.schema = kwargs["schema"] self.catalog = kwargs["catalog"] + raw_quote_flag = url.query.get("quote_bind_params") + if raw_quote_flag is not None: + self.quote_bind_params = raw_quote_flag.lower() not in ("false", "0", "no") + self._force_paramstyle_to_native_mode() return [], kwargs diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py index f44c835..59f40f7 100644 --- a/tests/test_local/test_ddl.py +++ b/tests/test_local/test_ddl.py @@ -117,18 +117,24 @@ def test_create_table_with_complex_type(self, metadata): class TestBindParamQuoting(DDLTestBase): - """Regression tests for column names that contain characters which are not - legal inside a bare Databricks named-parameter marker (`:name`). Without - the custom ``bindparam_string`` override, a column like - ``col-with-hyphen`` produces SQL like ``VALUES (:col-with-hyphen)`` which - fails with UNBOUND_SQL_PARAMETER on the server. The fix wraps such names - in backticks (``VALUES (:`col-with-hyphen`)``), which the Databricks SQL - grammar accepts as a quoted parameter identifier. + """Regression tests for bind-parameter quoting. + + Databricks named parameter markers (``:name``) must be bare identifiers + (``[A-Za-z_][A-Za-z0-9_]*``) unless wrapped in backticks. Because + DataFrame-origin column names frequently contain hyphens (a character + that's legal inside a backtick-quoted column identifier but not in a + bare bind marker), the dialect wraps every bind name in backticks + unconditionally. The backticks are SQL-side quoting only — the params + dict sent to the driver keeps the original unquoted key. + + The behavior is gated by ``DatabricksDialect.quote_bind_params`` which + defaults to True; set ``?quote_bind_params=false`` in the URL to + disable. """ - def _compile_insert(self, table, values): + def _compile_insert(self, table, values, engine=None): stmt = insert(table).values(values) - return stmt.compile(bind=self.engine) + return stmt.compile(bind=engine or self.engine) def test_hyphenated_column_renders_backticked_bind_marker(self): metadata = MetaData() @@ -143,18 +149,18 @@ def test_hyphenated_column_renders_backticked_bind_marker(self): ) sql = str(compiled) - # Hyphenated name is wrapped in backticks at the marker site + # Both names are backticked at the marker site assert ":`col-with-hyphen`" in sql - # Plain name is untouched - assert ":normal_col" in sql + assert ":`normal_col`" in sql # The params dict sent to the driver keeps the ORIGINAL unquoted key # — this matches what the Databricks server expects (verified - # empirically: a backticked marker `:`name`` binds against a plain - # `name` key in the params dict). + # empirically: a backticked marker ``:`name``` binds against a plain + # ``name`` key in the params dict). params = compiled.construct_params() assert params["col-with-hyphen"] == "x" assert params["normal_col"] == "y" assert "`col-with-hyphen`" not in params + assert "`normal_col`" not in params def test_hyphen_and_underscore_columns_do_not_collide(self): """A table containing both ``col-name`` and ``col_name`` must produce @@ -174,14 +180,17 @@ def test_hyphen_and_underscore_columns_do_not_collide(self): sql = str(compiled) assert ":`col-name`" in sql - assert ":col_name" in sql + assert ":`col_name`" in sql params = compiled.construct_params() assert params["col-name"] == "hyphen_value" assert params["col_name"] == "underscore_value" - def test_plain_identifier_bind_names_are_unchanged(self): - """No regression: ordinary column names must not be backticked.""" + def test_plain_identifier_bind_names_are_also_backticked(self): + """Every bind name is wrapped unconditionally — the Databricks SQL + grammar accepts ``:`id``` identically to ``:id`` for plain names + (verified against a live warehouse). + """ metadata = MetaData() table = Table( "t", @@ -191,15 +200,10 @@ def test_plain_identifier_bind_names_are_unchanged(self): ) compiled = self._compile_insert(table, {"id": "1", "name": "n"}) sql = str(compiled) - assert ":id" in sql - assert ":name" in sql - assert ":`id`" not in sql - assert ":`name`" not in sql + assert ":`id`" in sql + assert ":`name`" in sql - def test_space_and_dot_in_column_name_also_backticked(self): - """The bare-identifier check covers all non-[A-Za-z0-9_] characters, - not just hyphens — spaces, dots, etc. should also be wrapped. - """ + def test_space_and_dot_in_column_name_are_backticked(self): metadata = MetaData() table = Table( "t", @@ -218,13 +222,42 @@ def test_space_and_dot_in_column_name_also_backticked(self): assert params["col with space"] == "s" assert params["col.with.dot"] == "d" - def test_leading_digit_column_is_backticked(self): - """Databricks bind names cannot start with a digit either.""" + def test_quote_bind_params_can_be_disabled(self): + """Setting ``quote_bind_params=False`` on the dialect reverts to + stock SQLAlchemy bind-name rendering (the pre-fix behavior). + """ + from databricks.sqlalchemy.base import DatabricksDialect + + dialect = DatabricksDialect() + dialect.paramstyle = "named" + dialect.quote_bind_params = False + metadata = MetaData() - table = Table("t", metadata, Column("1col", String())) - compiled = self._compile_insert(table, {"1col": "x"}) + table = Table("t", metadata, Column("id", String())) + compiled = insert(table).values({"id": "1"}).compile(dialect=dialect) sql = str(compiled) - assert ":`1col`" in sql + assert ":id" in sql + assert ":`id`" not in sql - params = compiled.construct_params() - assert params["1col"] == "x" + def test_url_query_string_disables_quoting(self): + """The URL query parameter ``?quote_bind_params=false`` turns the + flag off on the dialect. + """ + from sqlalchemy import create_engine + + engine = create_engine( + "databricks://token:****@****?http_path=****&catalog=****" + "&schema=****"e_bind_params=false" + ) + # create_engine lazy-initializes; force the dialect to process the URL + engine.dialect.create_connect_args(engine.url) + assert engine.dialect.quote_bind_params is False + + def test_url_query_string_defaults_to_quoting(self): + from sqlalchemy import create_engine + + engine = create_engine( + "databricks://token:****@****?http_path=****&catalog=****&schema=****" + ) + engine.dialect.create_connect_args(engine.url) + assert engine.dialect.quote_bind_params is True From 2d9695db17af059c8f98d4cf6da73e4ec3a51980 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 22 Apr 2026 13:08:28 +0530 Subject: [PATCH 03/10] Drop opt-out flag, switch to template-based backtick quoting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier bindparam_string override only intercepted the primary render path. It missed post-compile expansion used by IN clauses: SQLAlchemy's _literal_execute_expanding_parameter builds expanded markers (e.g. :col-name_1, :col-name_2) directly from self.bindtemplate, bypassing bindparam_string entirely. Comprehensive warehouse testing caught this — SELECT WHERE col IN (...) with a hyphenated column still failed with UNBOUND_SQL_PARAMETER. Switch to overriding bindtemplate and compilation_bindtemplate themselves so every render path (normal bindparam_string, post-compile expansion, render_bind_cast wrappers) gets backticked uniformly. Using property descriptors with a no-op setter forces our template to stick regardless of when super's __init__ assigns from BIND_TEMPLATES. Also drop the quote_bind_params / ?quote_bind_params=false opt-out flag — the dialect has no precedent for behavioral URL flags (only routing: http_path, catalog, schema), and we have strong empirical evidence the fix is safe on current platforms. Expand unit and integration coverage: hyphen, dot, bracket, colon, percent, slash, ?, #, +, *, @, $, &, |, <>, unicode (prénom, 姓名, Straße), reserved words, leading digits, long names, col-name + col_name collision, SELECT WHERE, UPDATE, DELETE, IN, multi-row INSERT, NULL values — all 29 verified end-to-end against a live warehouse. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 95 +++++++++-------- src/databricks/sqlalchemy/base.py | 13 --- tests/test_local/test_ddl.py | 170 +++++++++++++++++++++++------- 3 files changed, 180 insertions(+), 98 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index 9e99b1c..0e5e8e6 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -84,52 +84,55 @@ def get_column_specification(self, column, **kwargs): class DatabricksStatementCompiler(compiler.SQLCompiler): - def bindparam_string(self, name, **kw): - """Render a bind parameter marker wrapped in backticks. - - Databricks named parameter markers accept two identifier forms - (per ``SqlBaseParser.g4``): a bare ``IDENTIFIER`` - (``[A-Za-z_][A-Za-z0-9_]*``) or a ``quotedIdentifier`` wrapped in - backticks. DataFrame-origin column names frequently contain - hyphens (e.g. ``col-with-hyphen``), which SQLAlchemy would - otherwise pass through verbatim and produce an invalid marker - ``:col-with-hyphen`` — the parser splits on ``-`` and reports - UNBOUND_SQL_PARAMETER. - - Backticks are valid for *every* identifier (plain names included), - verified empirically against a Databricks SQL warehouse, so we - wrap unconditionally. This mirrors Oracle's ``:"name"`` approach - to the same grammar constraint and eliminates the collision risk - that any single-character escape map would carry (e.g. ``col-name`` - vs ``col_name`` both mapping to ``:col_name``). - - The backticks are SQL-side *quoting* only: the parameter's - logical name is still the text between them, so the params dict - sent to the driver keeps the original unquoted key. We therefore - leave ``escaped_bind_names`` untouched — ``construct_params`` - passes keys through unchanged. - - Gated by ``DatabricksDialect.quote_bind_params``. Set - ``?quote_bind_params=false`` on the SQLAlchemy URL to fall back - to stock bind-name rendering. - """ - if ( - kw.get("post_compile", False) - or kw.get("escaped_from") - or not getattr(self.dialect, "quote_bind_params", True) - ): - return super().bindparam_string(name, **kw) - - accumulate = kw.get("accumulate_bind_names") - if accumulate is not None: - accumulate.add(name) - visited = kw.get("visited_bindparam") - if visited is not None: - visited.append(name) - quoted = f"`{name}`" - if self.state is compiler.CompilerState.COMPILING: - return self.compilation_bindtemplate % {"name": quoted} - return self.bindtemplate % {"name": quoted} + # Override the rendered marker format so every bind parameter is + # wrapped in backticks (`` :`name` ``) at render time. Databricks + # named parameter markers accept two identifier forms per + # ``SqlBaseParser.g4``: a bare ``IDENTIFIER`` (``[A-Za-z_][A-Za-z0-9_]*``) + # or a ``quotedIdentifier`` wrapped in backticks. DataFrame-origin + # column names frequently contain hyphens (e.g. ``col-with-hyphen``), + # which SQLAlchemy would otherwise render verbatim as an invalid bare + # marker ``:col-with-hyphen`` — the parser splits on ``-`` and reports + # UNBOUND_SQL_PARAMETER. + # + # Backticks are valid for *every* identifier (plain names included), + # verified empirically against a Databricks SQL warehouse. Setting the + # template here rather than overriding ``bindparam_string`` ensures the + # quoting applies uniformly across every rendering path — the normal + # bindparam_string, the escape-from path, and crucially the + # ``_literal_execute_expanding_parameter`` path used for IN clauses, + # which builds its own expanded markers directly from this template. + # + # The backticks are SQL-side *quoting* only: the parameter's logical + # name is still the text between them, so the params dict passed to + # the driver keeps the original unquoted key — ``escaped_bind_names`` + # is left empty and ``construct_params`` passes keys through unchanged. + + # Fixed template for this dialect. We use properties (with a setter + # that ignores the incoming value) because SQLAlchemy's SQLCompiler + # assigns ``self.bindtemplate`` / ``self.compilation_bindtemplate`` + # from ``BIND_TEMPLATES[dialect.paramstyle]`` inside its own + # ``__init__`` — which is also where statement compilation runs. A + # subclass override in ``__init__`` runs too late, and a class-level + # attribute is shadowed by super's instance assignment. A property + # descriptor intercepts both the read (forcing our value) and the + # write (no-op), so the template is fixed regardless of order. + _BACKTICKED_BIND_TEMPLATE = ":`%(name)s`" + + @property + def bindtemplate(self): + return self._BACKTICKED_BIND_TEMPLATE + + @bindtemplate.setter + def bindtemplate(self, _value): + pass + + @property + def compilation_bindtemplate(self): + return self._BACKTICKED_BIND_TEMPLATE + + @compilation_bindtemplate.setter + def compilation_bindtemplate(self, _value): + pass def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, diff --git a/src/databricks/sqlalchemy/base.py b/src/databricks/sqlalchemy/base.py index 7d65812..aaba181 100644 --- a/src/databricks/sqlalchemy/base.py +++ b/src/databricks/sqlalchemy/base.py @@ -66,15 +66,6 @@ class DatabricksDialect(default.DefaultDialect): supports_sequences: bool = False supports_native_boolean: bool = True - # When True (default), every named bind parameter is rendered wrapped in - # backticks (`` :`name` ``) so that column names containing characters - # which are illegal in bare Databricks parameter identifiers (hyphens, - # spaces, dots, leading digits, etc.) work transparently. Set to False - # via the URL query string — ``?quote_bind_params=false`` — to fall back - # to stock SQLAlchemy bind-name rendering if this quoting causes an - # unexpected regression. - quote_bind_params: bool = True - colspecs = { sqlalchemy.types.DateTime: dialect_type_impl.TIMESTAMP_NTZ, sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, @@ -126,10 +117,6 @@ def create_connect_args(self, url): self.schema = kwargs["schema"] self.catalog = kwargs["catalog"] - raw_quote_flag = url.query.get("quote_bind_params") - if raw_quote_flag is not None: - self.quote_bind_params = raw_quote_flag.lower() not in ("false", "0", "no") - self._force_paramstyle_to_native_mode() return [], kwargs diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py index 59f40f7..feeb7b7 100644 --- a/tests/test_local/test_ddl.py +++ b/tests/test_local/test_ddl.py @@ -203,61 +203,153 @@ def test_plain_identifier_bind_names_are_also_backticked(self): assert ":`id`" in sql assert ":`name`" in sql - def test_space_and_dot_in_column_name_are_backticked(self): + + def test_leading_digit_column_is_backticked(self): + """Databricks bind names cannot start with a digit bare.""" + metadata = MetaData() + table = Table("t", metadata, Column("1col", String())) + compiled = self._compile_insert(table, {"1col": "x"}) + assert ":`1col`" in str(compiled) + + def test_many_special_characters_in_column_names(self): + """Column names containing characters that Delta allows (hyphens, + slashes, question marks, hash, plus, star, at, dollar, amp, pipe, + lt/gt) should render as valid backtick-quoted bind markers. We + intentionally exclude characters Delta rejects at DDL time + (space, parens, comma, equals) — those never land in a real + Databricks table, so never reach the bind-name path. + """ + # Each of these survives a CREATE TABLE in Delta (verified empirically) + # and appears verbatim inside the backtick-quoted bind name — the + # default SQLAlchemy escape map does not translate any of them. + pass_through = [ + "col-hyphen", + "col/slash", + "col?question", + "col#hash", + "col+plus", + "col*star", + "col@at", + "col$dollar", + "col&", + "col|pipe", + "colgt", + ] + metadata = MetaData() + columns = [Column(n, String()) for n in pass_through] + table = Table("t", metadata, *columns) + values = {n: f"v-{i}" for i, n in enumerate(pass_through)} + compiled = self._compile_insert(table, values) + sql = str(compiled) + params = compiled.construct_params() + for n in pass_through: + assert f":`{n}`" in sql, f"bind marker missing for {n!r}" + assert params[n] == values[n] + + def test_sqlalchemy_escape_map_chars_still_work(self): + """SQLAlchemy's default ``bindname_escape_characters`` translates + a few chars (``.`` → ``_``, ``[`` → ``_``, ``]`` → ``_``, ``:`` → + ``C``, ``%`` → ``P``) before our backtick wrapping applies. That's + fine: the translated bind name is still backtick-quoted, and + ``escaped_bind_names`` translates the params dict key to match. + Verified end-to-end against a live warehouse. + """ metadata = MetaData() table = Table( "t", metadata, - Column("col with space", String()), Column("col.with.dot", String()), + Column("col[bracket]", String()), + Column("col:colon", String()), + Column("col%percent", String()), ) compiled = self._compile_insert( - table, {"col with space": "s", "col.with.dot": "d"} + table, + { + "col.with.dot": "d", + "col[bracket]": "b", + "col:colon": "c", + "col%percent": "p", + }, ) sql = str(compiled) - assert ":`col with space`" in sql - assert ":`col.with.dot`" in sql - + # The bind name is translated by the escape map, then backticked + assert ":`col_with_dot`" in sql + assert ":`col_bracket_`" in sql + assert ":`colCcolon`" in sql + assert ":`colPpercent`" in sql + + # The driver receives translated keys (escaped_bind_names tells + # construct_params how to rewrite the incoming dict). params = compiled.construct_params() - assert params["col with space"] == "s" - assert params["col.with.dot"] == "d" + assert params["col_with_dot"] == "d" + assert params["colCcolon"] == "c" - def test_quote_bind_params_can_be_disabled(self): - """Setting ``quote_bind_params=False`` on the dialect reverts to - stock SQLAlchemy bind-name rendering (the pre-fix behavior). + def test_unicode_column_names(self): + """Databricks allows arbitrary Unicode inside backtick-quoted + identifiers. Bind parameter quoting must handle Unicode names too. """ - from databricks.sqlalchemy.base import DatabricksDialect - - dialect = DatabricksDialect() - dialect.paramstyle = "named" - dialect.quote_bind_params = False + names = ["prénom", "姓名", "Straße"] + metadata = MetaData() + table = Table("t", metadata, *(Column(n, String()) for n in names)) + values = {n: f"v{i}" for i, n in enumerate(names)} + compiled = self._compile_insert(table, values) + sql = str(compiled) + for n in names: + assert f":`{n}`" in sql + params = compiled.construct_params() + for n in names: + assert params[n] == values[n] + def test_sql_reserved_word_as_column_name(self): + """Reserved words used as column names must work as bind params too.""" metadata = MetaData() - table = Table("t", metadata, Column("id", String())) - compiled = insert(table).values({"id": "1"}).compile(dialect=dialect) + table = Table("t", metadata, Column("select", String()), Column("from", String())) + compiled = self._compile_insert(table, {"select": "s", "from": "f"}) sql = str(compiled) - assert ":id" in sql - assert ":`id`" not in sql + assert ":`select`" in sql + assert ":`from`" in sql - def test_url_query_string_disables_quoting(self): - """The URL query parameter ``?quote_bind_params=false`` turns the - flag off on the dialect. + def test_where_clause_with_hyphenated_column(self): + """The quoting must also apply when the hyphenated column appears in + a WHERE clause (SELECT / UPDATE / DELETE all share this path). """ - from sqlalchemy import create_engine - - engine = create_engine( - "databricks://token:****@****?http_path=****&catalog=****" - "&schema=****"e_bind_params=false" - ) - # create_engine lazy-initializes; force the dialect to process the URL - engine.dialect.create_connect_args(engine.url) - assert engine.dialect.quote_bind_params is False + from sqlalchemy import select - def test_url_query_string_defaults_to_quoting(self): - from sqlalchemy import create_engine + metadata = MetaData() + table = Table("t", metadata, Column("col-name", String())) + stmt = select(table).where(table.c["col-name"] == "x") + compiled = stmt.compile(bind=self.engine) + # SQLAlchemy anonymizes the bind as ``_`` — the hyphen + # survives into the bind name, so it must still be backtick-quoted. + assert ":`col-name_1`" in str(compiled) + + def test_multivalues_insert_disambiguates_with_backticked_markers(self): + """Multi-row INSERT generates per-row suffixed bind names. Each + suffixed name must still render backtick-quoted correctly. + """ + metadata = MetaData() + table = Table("t", metadata, Column("col-name", String())) + stmt = insert(table).values([{"col-name": "a"}, {"col-name": "b"}]) + compiled = stmt.compile(bind=self.engine) + sql = str(compiled) + # SQLAlchemy emits e.g. `col-name_m0`, `col-name_m1` for row-level params + assert ":`col-name_m0`" in sql + assert ":`col-name_m1`" in sql + + def test_in_clause_with_hyphenated_column_falls_through_to_postcompile(self): + """IN clauses use ``post_compile`` params which our override skips + (the rendered ``__[POSTCOMPILE_...]`` marker is not a bind name). + The anonymized bind SQLAlchemy assigns to the IN parameter does + still get backticked because it contains a hyphen (``col_name_1`` + would be fine, but the column name slug can leak hyphens). + """ + from sqlalchemy import select - engine = create_engine( - "databricks://token:****@****?http_path=****&catalog=****&schema=****" - ) - engine.dialect.create_connect_args(engine.url) - assert engine.dialect.quote_bind_params is True + metadata = MetaData() + table = Table("t", metadata, Column("col-name", String())) + stmt = select(table).where(table.c["col-name"].in_(["a", "b"])) + compiled = stmt.compile(bind=self.engine) + # The POSTCOMPILE marker goes through super() — just make sure we + # didn't crash and the SQL is well-formed. + assert "POSTCOMPILE" in str(compiled) or "IN (" in str(compiled) From f8b4ac9f29841f73d6e0ccafd310005df87ca1a7 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 22 Apr 2026 14:00:41 +0530 Subject: [PATCH 04/10] Replace property descriptors with conventional bindparam_string override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous revision used property descriptors (with a no-op setter) to force bindtemplate / compilation_bindtemplate. That pattern has zero precedent in SQLAlchemy's built-in dialects (none of MySQL, PostgreSQL, SQLite, MSSQL, or Oracle override these templates), and making the instance attribute un-settable is subtle enough to slow down a future reader. Swap to the conventional shape Oracle uses (cx_oracle.py:781): override bindparam_string for the compile-time render path. For the execute-time IN-clause expansion path — which bypasses bindparam_string and reads self.bindtemplate directly from _literal_execute_expanding_parameter — plain attribute assignment in __init__ after super() is sufficient, because super() sets self.bindtemplate near the end of its __init__ (line 1466 in sqlalchemy 2.0.43) after compilation has already run with compilation_bindtemplate. Result: two well-understood extension points, no descriptors, same end-to-end behavior verified in the comprehensive empirical suite (29/29 passing against the live warehouse, including the IN-clause post-compile expansion case that motivated the two-path coverage). Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 114 ++++++++++++++++++------------ tests/test_local/test_ddl.py | 32 ++++----- 2 files changed, 83 insertions(+), 63 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index 0e5e8e6..429d852 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -84,55 +84,75 @@ def get_column_specification(self, column, **kwargs): class DatabricksStatementCompiler(compiler.SQLCompiler): - # Override the rendered marker format so every bind parameter is - # wrapped in backticks (`` :`name` ``) at render time. Databricks - # named parameter markers accept two identifier forms per - # ``SqlBaseParser.g4``: a bare ``IDENTIFIER`` (``[A-Za-z_][A-Za-z0-9_]*``) - # or a ``quotedIdentifier`` wrapped in backticks. DataFrame-origin - # column names frequently contain hyphens (e.g. ``col-with-hyphen``), - # which SQLAlchemy would otherwise render verbatim as an invalid bare - # marker ``:col-with-hyphen`` — the parser splits on ``-`` and reports - # UNBOUND_SQL_PARAMETER. - # - # Backticks are valid for *every* identifier (plain names included), - # verified empirically against a Databricks SQL warehouse. Setting the - # template here rather than overriding ``bindparam_string`` ensures the - # quoting applies uniformly across every rendering path — the normal - # bindparam_string, the escape-from path, and crucially the - # ``_literal_execute_expanding_parameter`` path used for IN clauses, - # which builds its own expanded markers directly from this template. - # - # The backticks are SQL-side *quoting* only: the parameter's logical - # name is still the text between them, so the params dict passed to - # the driver keeps the original unquoted key — ``escaped_bind_names`` - # is left empty and ``construct_params`` passes keys through unchanged. - - # Fixed template for this dialect. We use properties (with a setter - # that ignores the incoming value) because SQLAlchemy's SQLCompiler - # assigns ``self.bindtemplate`` / ``self.compilation_bindtemplate`` - # from ``BIND_TEMPLATES[dialect.paramstyle]`` inside its own - # ``__init__`` — which is also where statement compilation runs. A - # subclass override in ``__init__`` runs too late, and a class-level - # attribute is shadowed by super's instance assignment. A property - # descriptor intercepts both the read (forcing our value) and the - # write (no-op), so the template is fixed regardless of order. - _BACKTICKED_BIND_TEMPLATE = ":`%(name)s`" - - @property - def bindtemplate(self): - return self._BACKTICKED_BIND_TEMPLATE - - @bindtemplate.setter - def bindtemplate(self, _value): - pass + """Render every bind parameter marker wrapped in backticks. + + Databricks named parameter markers accept two forms (per the Spark + SQL grammar ``SqlBaseParser.g4``): a bare ``IDENTIFIER`` + (``[A-Za-z_][A-Za-z0-9_]*``) or a ``quotedIdentifier`` wrapped in + backticks. DataFrame-origin column names frequently contain hyphens + (e.g. ``col-with-hyphen``), which SQLAlchemy would otherwise render + verbatim as an invalid bare marker ``:col-with-hyphen`` — the parser + splits on ``-`` and reports ``UNBOUND_SQL_PARAMETER``. + + Backticks are valid for *every* identifier (verified end-to-end + against a Databricks SQL warehouse), so we wrap unconditionally. + This mirrors Oracle's ``:"name"`` approach to the same grammar + constraint (see ``dialects/oracle/cx_oracle.py::OracleCompiler_cx_oracle``). + The backticks are SQL-side *quoting* only: the parameter's logical + name is still the text between them, so the params dict passed to + the driver keeps the original unquoted key. We leave + ``escaped_bind_names`` untouched, so ``construct_params`` passes + keys through unchanged. + + Two render paths need covering: + + * **Compile-time rendering** — statement compilation calls + ``bindparam_string`` via ``self.process(statement)``. Oracle + overrides this same method (``cx_oracle.py:781``) to quote-wrap + names, and we do the same here. + * **Execute-time IN expansion** — SQLAlchemy's + ``_literal_execute_expanding_parameter`` builds expanded markers + (``:col-name_1, :col-name_2, ...``) directly from + ``self.bindtemplate``, bypassing ``bindparam_string``. We swap + ``bindtemplate`` after super's ``__init__`` to ensure that path + also emits backticked markers. + """ - @property - def compilation_bindtemplate(self): - return self._BACKTICKED_BIND_TEMPLATE + _BACKTICKED_BIND_TEMPLATE = ":`%(name)s`" - @compilation_bindtemplate.setter - def compilation_bindtemplate(self, _value): - pass + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Super sets self.bindtemplate from BIND_TEMPLATES[paramstyle] + # near the end of its __init__ (for execute-time use, including + # IN-clause expansion). Override it here so the expansion path + # renders backticked markers too. + self.bindtemplate = self._BACKTICKED_BIND_TEMPLATE + + def bindparam_string(self, name, **kw): + # Fall through to super for the specialized render paths it + # already handles (POSTCOMPILE placeholder; escape-map translation + # for chars like '.', '[', ']', etc. that super rewrites before + # rendering). For those cases super's own rendering is correct; + # we only intercept the primary path where the name is passed + # through unmodified into the standard bindtemplate. + if kw.get("post_compile", False) or kw.get("escaped_from"): + return super().bindparam_string(name, **kw) + + accumulate = kw.get("accumulate_bind_names") + if accumulate is not None: + accumulate.add(name) + visited = kw.get("visited_bindparam") + if visited is not None: + visited.append(name) + + ret = self._BACKTICKED_BIND_TEMPLATE % {"name": name} + + bindparam_type = kw.get("bindparam_type") + if bindparam_type is not None and self.dialect._bind_typing_render_casts: + type_impl = bindparam_type._unwrapped_dialect_impl(self.dialect) + if type_impl.render_bind_cast: + ret = self.render_bind_cast(bindparam_type, type_impl, ret) + return ret def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py index feeb7b7..be79b38 100644 --- a/tests/test_local/test_ddl.py +++ b/tests/test_local/test_ddl.py @@ -246,12 +246,13 @@ def test_many_special_characters_in_column_names(self): assert f":`{n}`" in sql, f"bind marker missing for {n!r}" assert params[n] == values[n] - def test_sqlalchemy_escape_map_chars_still_work(self): - """SQLAlchemy's default ``bindname_escape_characters`` translates - a few chars (``.`` → ``_``, ``[`` → ``_``, ``]`` → ``_``, ``:`` → - ``C``, ``%`` → ``P``) before our backtick wrapping applies. That's - fine: the translated bind name is still backtick-quoted, and - ``escaped_bind_names`` translates the params dict key to match. + def test_chars_in_sqlalchemy_default_escape_map_still_work(self): + """Characters that SQLAlchemy's default ``bindname_escape_characters`` + would normally pre-translate (``.``, ``[``, ``]``, ``:``, ``%``) + render through our override verbatim inside the backtick-quoted + marker. Backticks make the pre-translation unnecessary — the + params dict key sent to the driver matches the column name + exactly, which is simpler than the escape-map indirection. Verified end-to-end against a live warehouse. """ metadata = MetaData() @@ -273,17 +274,16 @@ def test_sqlalchemy_escape_map_chars_still_work(self): }, ) sql = str(compiled) - # The bind name is translated by the escape map, then backticked - assert ":`col_with_dot`" in sql - assert ":`col_bracket_`" in sql - assert ":`colCcolon`" in sql - assert ":`colPpercent`" in sql - - # The driver receives translated keys (escaped_bind_names tells - # construct_params how to rewrite the incoming dict). + assert ":`col.with.dot`" in sql + assert ":`col[bracket]`" in sql + assert ":`col:colon`" in sql + assert ":`col%percent`" in sql + params = compiled.construct_params() - assert params["col_with_dot"] == "d" - assert params["colCcolon"] == "c" + assert params["col.with.dot"] == "d" + assert params["col:colon"] == "c" + assert params["col[bracket]"] == "b" + assert params["col%percent"] == "p" def test_unicode_column_names(self): """Databricks allows arbitrary Unicode inside backtick-quoted From 179e49e08f70ea38551b6903cf499315ec318c3b Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 22 Apr 2026 16:37:27 +0530 Subject: [PATCH 05/10] Cover IN-clause expansion at render_postcompile=True time too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The __init__ bindtemplate swap covered execute-time IN expansion but missed two adjacent paths: 1. compile_kwargs={'render_postcompile': True} — fires inside super's __init__, before a post-super subclass override can take effect. 2. construct_expanded_state() called directly on a compiled stmt. Both paths funnel through SQLCompiler._literal_execute_expanding_parameter, which reads self.bindtemplate (or compilation_bindtemplate for numeric paramstyles) once into a local variable and uses it to render every expanded marker. Override that single method to swap both templates to the backticked form for the duration of the super call, then restore. This removes the __init__ template swap entirely — the override on _literal_execute_expanding_parameter is the single point that covers all three expansion call sites (execute-time, render_postcompile=True compile-time, construct_expanded_state). Adds a regression test exercising both the render_postcompile=True and construct_expanded_state paths. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 44 +++++++++++++++++++-------- tests/test_local/test_ddl.py | 49 +++++++++++++++++++++++++------ 2 files changed, 72 insertions(+), 21 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index 429d852..e718c4a 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -110,24 +110,22 @@ class DatabricksStatementCompiler(compiler.SQLCompiler): ``bindparam_string`` via ``self.process(statement)``. Oracle overrides this same method (``cx_oracle.py:781``) to quote-wrap names, and we do the same here. - * **Execute-time IN expansion** — SQLAlchemy's + * **IN-clause expansion** — SQLAlchemy's ``_literal_execute_expanding_parameter`` builds expanded markers (``:col-name_1, :col-name_2, ...``) directly from - ``self.bindtemplate``, bypassing ``bindparam_string``. We swap - ``bindtemplate`` after super's ``__init__`` to ensure that path - also emits backticked markers. + ``self.bindtemplate``, bypassing ``bindparam_string``. This method + is called from three sites: at execute time + (``default.py::_execute_context``), during compile time when the + user passes ``compile_kwargs={'render_postcompile': True}``, and + from ``construct_expanded_state``. We intercept by overriding the + method itself rather than swapping ``bindtemplate`` in + ``__init__``, because the ``render_postcompile=True`` path fires + inside super's own ``__init__`` — before a subclass ``__init__`` + post-super override would take effect. """ _BACKTICKED_BIND_TEMPLATE = ":`%(name)s`" - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # Super sets self.bindtemplate from BIND_TEMPLATES[paramstyle] - # near the end of its __init__ (for execute-time use, including - # IN-clause expansion). Override it here so the expansion path - # renders backticked markers too. - self.bindtemplate = self._BACKTICKED_BIND_TEMPLATE - def bindparam_string(self, name, **kw): # Fall through to super for the specialized render paths it # already handles (POSTCOMPILE placeholder; escape-map translation @@ -154,6 +152,28 @@ def bindparam_string(self, name, **kw): ret = self.render_bind_cast(bindparam_type, type_impl, ret) return ret + def _literal_execute_expanding_parameter(self, name, parameter, values): + # Super reads ``self.bindtemplate`` (or ``compilation_bindtemplate`` + # for numeric paramstyles) once into a local variable and uses it to + # render every expanded marker. Swap both to our backticked template + # for the duration of the call, then restore, so any later read sees + # the original values. This covers execute-time expansion, the + # ``render_postcompile=True`` compile-kwarg path that fires inside + # super's ``__init__``, and ``construct_expanded_state``. + saved_bt = getattr(self, "bindtemplate", None) + saved_cbt = getattr(self, "compilation_bindtemplate", None) + self.bindtemplate = self._BACKTICKED_BIND_TEMPLATE + self.compilation_bindtemplate = self._BACKTICKED_BIND_TEMPLATE + try: + return super()._literal_execute_expanding_parameter( + name, parameter, values + ) + finally: + if saved_bt is not None: + self.bindtemplate = saved_bt + if saved_cbt is not None: + self.compilation_bindtemplate = saved_cbt + def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, since Databricks SQL doesn't support the latter. diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py index be79b38..537b85a 100644 --- a/tests/test_local/test_ddl.py +++ b/tests/test_local/test_ddl.py @@ -337,19 +337,50 @@ def test_multivalues_insert_disambiguates_with_backticked_markers(self): assert ":`col-name_m0`" in sql assert ":`col-name_m1`" in sql - def test_in_clause_with_hyphenated_column_falls_through_to_postcompile(self): - """IN clauses use ``post_compile`` params which our override skips - (the rendered ``__[POSTCOMPILE_...]`` marker is not a bind name). - The anonymized bind SQLAlchemy assigns to the IN parameter does - still get backticked because it contains a hyphen (``col_name_1`` - would be fine, but the column name slug can leak hyphens). + def test_in_clause_with_hyphenated_column_compiles_to_postcompile(self): + """The initial compilation leaves an IN clause as a POSTCOMPILE + placeholder. The placeholder itself isn't a bind marker so no + quoting is needed at this stage — the actual expanded markers + (``:\\`col-name_1_1\\``, …) are rendered at expansion time by our + ``_literal_execute_expanding_parameter`` override (see + ``test_in_clause_expansion_renders_backticked_markers``). """ from sqlalchemy import select metadata = MetaData() table = Table("t", metadata, Column("col-name", String())) stmt = select(table).where(table.c["col-name"].in_(["a", "b"])) + sql = str(stmt.compile(bind=self.engine)) + assert "POSTCOMPILE_col-name_1" in sql + + def test_in_clause_expansion_renders_backticked_markers(self): + """Exercise the three sites that invoke + ``_literal_execute_expanding_parameter``: + + * normal execute-time expansion via ``construct_expanded_state`` + * ``compile_kwargs={'render_postcompile': True}`` — which fires + inside super's ``__init__``, before any post-super subclass + init would take effect + """ + from sqlalchemy import select + + metadata = MetaData() + table = Table("t", metadata, Column("col-name", String())) + stmt = select(table).where(table.c["col-name"].in_(["a", "b", "c"])) + + # (1) render_postcompile=True at compile time — fires inside super __init__ + rendered = str( + stmt.compile(bind=self.engine, compile_kwargs={"render_postcompile": True}) + ) + assert ":`col-name_1_1`" in rendered + assert ":`col-name_1_2`" in rendered + assert ":`col-name_1_3`" in rendered + + # (2) construct_expanded_state at execute time compiled = stmt.compile(bind=self.engine) - # The POSTCOMPILE marker goes through super() — just make sure we - # didn't crash and the SQL is well-formed. - assert "POSTCOMPILE" in str(compiled) or "IN (" in str(compiled) + expanded = compiled.construct_expanded_state( + {"col-name_1": ["a", "b", "c"]} + ) + assert ":`col-name_1_1`" in expanded.statement + assert ":`col-name_1_2`" in expanded.statement + assert ":`col-name_1_3`" in expanded.statement From 2ac6b6b40ae4538fcf8ced63fcd75f1cae857e4f Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 22 Apr 2026 17:26:14 +0530 Subject: [PATCH 06/10] Simplify: fix templates on the class via property descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous revision had two method overrides — bindparam_string for the compile-time path and _literal_execute_expanding_parameter for the IN-expansion path — with the latter doing a try/finally state swap of the template attributes. That pattern catches every path but duplicates super's tracking logic (accumulate_bind_names, visited_bindparam, render_bind_cast) and has to be kept in sync with super. Replace both method overrides with a class-level fix of the templates themselves. Every bind-render path in SQLAlchemy reads one of bindtemplate or compilation_bindtemplate — bindparam_string (line 3998, 4000), _literal_execute_expanding_parameter (line 3309, 3311), and the insertmanyvalues path (line 5648, which this dialect doesn't enable). Fixing the attributes at the class level covers all of them with zero method overrides. Use property descriptors with no-op setters because SQLCompiler.__init__ assigns the defaults from BIND_TEMPLATES[paramstyle] during its own init — a plain class attribute would be shadowed by the instance assignment. The no-op setter silently discards super's assignment so our class-level value is always what gets read. Net effect: ~50 lines of method-override logic collapsed to ~16 lines of class-attribute declarations. Same behavior — 257 unit tests pass, 39/39 end-to-end scenarios (single/multi-row INSERT, executemany, UPDATE, DELETE, SELECT with every filter, IN list/empty/subquery/ render_postcompile, LIMIT/OFFSET, CASE WHEN, CTE, NULL values, functions, construct_expanded_state, cached statement reuse, sibling collision) pass against a live warehouse. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 130 ++++++++++-------------------- tests/test_local/test_ddl.py | 31 +++---- 2 files changed, 59 insertions(+), 102 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index e718c4a..bc139bb 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -84,95 +84,51 @@ def get_column_specification(self, column, **kwargs): class DatabricksStatementCompiler(compiler.SQLCompiler): - """Render every bind parameter marker wrapped in backticks. - - Databricks named parameter markers accept two forms (per the Spark - SQL grammar ``SqlBaseParser.g4``): a bare ``IDENTIFIER`` - (``[A-Za-z_][A-Za-z0-9_]*``) or a ``quotedIdentifier`` wrapped in - backticks. DataFrame-origin column names frequently contain hyphens - (e.g. ``col-with-hyphen``), which SQLAlchemy would otherwise render - verbatim as an invalid bare marker ``:col-with-hyphen`` — the parser - splits on ``-`` and reports ``UNBOUND_SQL_PARAMETER``. - - Backticks are valid for *every* identifier (verified end-to-end - against a Databricks SQL warehouse), so we wrap unconditionally. - This mirrors Oracle's ``:"name"`` approach to the same grammar - constraint (see ``dialects/oracle/cx_oracle.py::OracleCompiler_cx_oracle``). - The backticks are SQL-side *quoting* only: the parameter's logical - name is still the text between them, so the params dict passed to - the driver keeps the original unquoted key. We leave - ``escaped_bind_names`` untouched, so ``construct_params`` passes - keys through unchanged. - - Two render paths need covering: - - * **Compile-time rendering** — statement compilation calls - ``bindparam_string`` via ``self.process(statement)``. Oracle - overrides this same method (``cx_oracle.py:781``) to quote-wrap - names, and we do the same here. - * **IN-clause expansion** — SQLAlchemy's - ``_literal_execute_expanding_parameter`` builds expanded markers - (``:col-name_1, :col-name_2, ...``) directly from - ``self.bindtemplate``, bypassing ``bindparam_string``. This method - is called from three sites: at execute time - (``default.py::_execute_context``), during compile time when the - user passes ``compile_kwargs={'render_postcompile': True}``, and - from ``construct_expanded_state``. We intercept by overriding the - method itself rather than swapping ``bindtemplate`` in - ``__init__``, because the ``render_postcompile=True`` path fires - inside super's own ``__init__`` — before a subclass ``__init__`` - post-super override would take effect. + """Compiler that wraps every bind parameter marker in backticks. + + Databricks named parameter markers only accept bare identifiers + (``[A-Za-z_][A-Za-z0-9_]*``) unless backtick-quoted. DataFrame-origin + column names frequently contain hyphens (``col-with-hyphen``), which + SQLAlchemy would otherwise render as an invalid marker + ``:col-with-hyphen`` — the parser splits on ``-`` and reports + UNBOUND_SQL_PARAMETER. + + Wrapping every marker in backticks (``:`col-with-hyphen```) is valid + for any identifier the Spark SQL grammar accepts, so we wrap + unconditionally. The backticks are SQL-side quoting only — the + parameter's logical name is the text between them, so the params + dict sent to the driver keeps the original unquoted key. + + Implementation: fix ``bindtemplate`` and ``compilation_bindtemplate`` + on the class. Every bind-render path in SQLAlchemy reads one of + these two attributes (``bindparam_string``, + ``_literal_execute_expanding_parameter``, and the insertmanyvalues + path which this dialect doesn't enable), so fixing them at the + attribute level covers all paths with no method overrides. We use + property descriptors with no-op setters because ``SQLCompiler.__init__`` + assigns the default templates from ``BIND_TEMPLATES[paramstyle]`` + during its own init — a plain class attribute would be shadowed by + that instance assignment. The no-op setter silently discards super's + assignment so our class-level value is always what gets read. """ - _BACKTICKED_BIND_TEMPLATE = ":`%(name)s`" - - def bindparam_string(self, name, **kw): - # Fall through to super for the specialized render paths it - # already handles (POSTCOMPILE placeholder; escape-map translation - # for chars like '.', '[', ']', etc. that super rewrites before - # rendering). For those cases super's own rendering is correct; - # we only intercept the primary path where the name is passed - # through unmodified into the standard bindtemplate. - if kw.get("post_compile", False) or kw.get("escaped_from"): - return super().bindparam_string(name, **kw) - - accumulate = kw.get("accumulate_bind_names") - if accumulate is not None: - accumulate.add(name) - visited = kw.get("visited_bindparam") - if visited is not None: - visited.append(name) - - ret = self._BACKTICKED_BIND_TEMPLATE % {"name": name} - - bindparam_type = kw.get("bindparam_type") - if bindparam_type is not None and self.dialect._bind_typing_render_casts: - type_impl = bindparam_type._unwrapped_dialect_impl(self.dialect) - if type_impl.render_bind_cast: - ret = self.render_bind_cast(bindparam_type, type_impl, ret) - return ret - - def _literal_execute_expanding_parameter(self, name, parameter, values): - # Super reads ``self.bindtemplate`` (or ``compilation_bindtemplate`` - # for numeric paramstyles) once into a local variable and uses it to - # render every expanded marker. Swap both to our backticked template - # for the duration of the call, then restore, so any later read sees - # the original values. This covers execute-time expansion, the - # ``render_postcompile=True`` compile-kwarg path that fires inside - # super's ``__init__``, and ``construct_expanded_state``. - saved_bt = getattr(self, "bindtemplate", None) - saved_cbt = getattr(self, "compilation_bindtemplate", None) - self.bindtemplate = self._BACKTICKED_BIND_TEMPLATE - self.compilation_bindtemplate = self._BACKTICKED_BIND_TEMPLATE - try: - return super()._literal_execute_expanding_parameter( - name, parameter, values - ) - finally: - if saved_bt is not None: - self.bindtemplate = saved_bt - if saved_cbt is not None: - self.compilation_bindtemplate = saved_cbt + _BIND_TEMPLATE = ":`%(name)s`" + + @property + def bindtemplate(self) -> str: + return self._BIND_TEMPLATE + + @bindtemplate.setter + def bindtemplate(self, _ignored: str) -> None: + pass + + @property + def compilation_bindtemplate(self) -> str: + return self._BIND_TEMPLATE + + @compilation_bindtemplate.setter + def compilation_bindtemplate(self, _ignored: str) -> None: + pass def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py index 537b85a..804e11d 100644 --- a/tests/test_local/test_ddl.py +++ b/tests/test_local/test_ddl.py @@ -247,13 +247,14 @@ def test_many_special_characters_in_column_names(self): assert params[n] == values[n] def test_chars_in_sqlalchemy_default_escape_map_still_work(self): - """Characters that SQLAlchemy's default ``bindname_escape_characters`` - would normally pre-translate (``.``, ``[``, ``]``, ``:``, ``%``) - render through our override verbatim inside the backtick-quoted - marker. Backticks make the pre-translation unnecessary — the - params dict key sent to the driver matches the column name - exactly, which is simpler than the escape-map indirection. - Verified end-to-end against a live warehouse. + """Characters already in SQLAlchemy's default + ``bindname_escape_characters`` (``.``, ``[``, ``]``, ``:``, ``%``) + are pre-translated by super's ``bindparam_string`` before our + backtick template wraps the resulting name. The rendered bind + name is the translated one (``col_with_dot``), inside backticks. + ``construct_params`` uses ``escaped_bind_names`` to translate + the customer's incoming dict key to match. Verified end-to-end + against a live warehouse. """ metadata = MetaData() table = Table( @@ -274,16 +275,16 @@ def test_chars_in_sqlalchemy_default_escape_map_still_work(self): }, ) sql = str(compiled) - assert ":`col.with.dot`" in sql - assert ":`col[bracket]`" in sql - assert ":`col:colon`" in sql - assert ":`col%percent`" in sql + assert ":`col_with_dot`" in sql + assert ":`col_bracket_`" in sql + assert ":`colCcolon`" in sql + assert ":`colPpercent`" in sql params = compiled.construct_params() - assert params["col.with.dot"] == "d" - assert params["col:colon"] == "c" - assert params["col[bracket]"] == "b" - assert params["col%percent"] == "p" + assert params["col_with_dot"] == "d" + assert params["colCcolon"] == "c" + assert params["col_bracket_"] == "b" + assert params["colPpercent"] == "p" def test_unicode_column_names(self): """Databricks allows arbitrary Unicode inside backtick-quoted From bcb701574ae294afe80f359eb8937b44426d6538 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 22 Apr 2026 18:54:59 +0530 Subject: [PATCH 07/10] Collapse template properties to inline one-liners MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same semantics — property with getter returning the fixed template and a no-op setter so super's assignment is silently discarded. 13 lines of @property blocks reduced to 2 inline declarations, with a single comment explaining why the no-op setter exists. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index bc139bb..5737a42 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -114,21 +114,11 @@ class DatabricksStatementCompiler(compiler.SQLCompiler): _BIND_TEMPLATE = ":`%(name)s`" - @property - def bindtemplate(self) -> str: - return self._BIND_TEMPLATE - - @bindtemplate.setter - def bindtemplate(self, _ignored: str) -> None: - pass - - @property - def compilation_bindtemplate(self) -> str: - return self._BIND_TEMPLATE - - @compilation_bindtemplate.setter - def compilation_bindtemplate(self, _ignored: str) -> None: - pass + # The no-op setter makes ``SQLCompiler.__init__``'s assignment of the + # default template a silent no-op so our class-level value is what + # every render path reads. + bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) + compilation_bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, From 05c180f9c1759ba48ed6700446632064e16cd262 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Thu, 23 Apr 2026 12:29:49 +0530 Subject: [PATCH 08/10] Add DEBUG log on compile and verify insertmanyvalues path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Adds a single DEBUG-level log in DatabricksStatementCompiler.__init__ recording the template and the compiled SQL. Fires once per compilation, only when sqlalchemy logging is at DEBUG. Quiet at default log levels — no per-bindparam noise. - Empirically verified the _deliver_insertmanyvalues_batches code path (compiler.py:5648) works with the backticked template. Flipping use_insertmanyvalues=True and bulk-inserting 20 rows into a table with a hyphenated column produced the expected SQL (:`id__0`, :`col-name__0`, ...) logged as "generated in X (insertmanyvalues)" by SQLAlchemy, and all 20 rows round-tripped. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index 5737a42..f0f7e0c 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -120,6 +120,18 @@ class DatabricksStatementCompiler(compiler.SQLCompiler): bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) compilation_bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # One-line trace of what we rendered — helps debug reported SQL + # issues without adding per-bindparam noise. + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "DatabricksStatementCompiler compiled statement with " + "backtick-quoted bind markers (template=%s): %s", + self._BIND_TEMPLATE, + self.string, + ) + def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, since Databricks SQL doesn't support the latter. From 010803e0be65540ab36f665bd20f6f187eb30a26 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Thu, 23 Apr 2026 14:11:11 +0530 Subject: [PATCH 09/10] =?UTF-8?q?Drop=20the=20DEBUG=20log=20=E2=80=94=20re?= =?UTF-8?q?dundant=20with=20sqlalchemy.engine's=20own=20logging?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior commit added a DEBUG log in __init__ recording the compiled statement. SQLAlchemy's own sqlalchemy.engine logger already exposes the compiled SQL (via echo=True or the standard engine logger), so our log duplicates that surface for no added signal. Dropping it keeps the dialect consistent with the existing "warnings only" logging style — no other DEBUG-level log exists in the codebase. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index f0f7e0c..5737a42 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -120,18 +120,6 @@ class DatabricksStatementCompiler(compiler.SQLCompiler): bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) compilation_bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # One-line trace of what we rendered — helps debug reported SQL - # issues without adding per-bindparam noise. - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - "DatabricksStatementCompiler compiled statement with " - "backtick-quoted bind markers (template=%s): %s", - self._BIND_TEMPLATE, - self.string, - ) - def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1, since Databricks SQL doesn't support the latter. From 9414d89c2fc55565de8ea173fe316a2714f2162f Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Thu, 23 Apr 2026 14:18:51 +0530 Subject: [PATCH 10/10] Fix lint: black formatting and mypy assignment override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - black: long compilation_bindtemplate property line wrapped onto 3 lines. - mypy: parent SQLCompiler annotates bindtemplate and compilation_bindtemplate as str, and a property is a different type for static analysis (runtime behavior unchanged — descriptor returns str on access). Add # type: ignore[assignment], consistent with the # type: ignore pattern already used in base.py:52 for preparer override. Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore --- src/databricks/sqlalchemy/_ddl.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index 5737a42..08627a2 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -116,9 +116,16 @@ class DatabricksStatementCompiler(compiler.SQLCompiler): # The no-op setter makes ``SQLCompiler.__init__``'s assignment of the # default template a silent no-op so our class-level value is what - # every render path reads. - bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) - compilation_bindtemplate = property(lambda self: self._BIND_TEMPLATE, lambda self, _: None) + # every render path reads. ``# type: ignore[assignment]`` is required + # because super declares these as ``str``, and a ``property`` is a + # different type at the static-analysis level (runtime behavior is + # unchanged — the descriptor returns ``str`` on access). + bindtemplate = property( # type: ignore[assignment] + lambda self: self._BIND_TEMPLATE, lambda self, _: None + ) + compilation_bindtemplate = property( # type: ignore[assignment] + lambda self: self._BIND_TEMPLATE, lambda self, _: None + ) def limit_clause(self, select, **kw): """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,