From 9902d0290fd6793c8e1ac2d91dd47f941fde21b7 Mon Sep 17 00:00:00 2001 From: Cristhian Lopez Vidal Date: Mon, 23 Mar 2026 16:23:23 -0700 Subject: [PATCH 1/3] feat(clickhouse): support PARTITION BY after ORDER BY in CREATE TABLE ClickHouse DDL allows PARTITION BY to appear after ORDER BY, which differs from standard SQL ordering. This change makes the parser accept both orderings when using the ClickHouseDialect or GenericDialect. Fixes a parse failure for production ClickHouse CREATE TABLE statements like: CREATE TABLE t (...) ENGINE = MergeTree() ORDER BY (...) PARTITION BY expr Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/parser/mod.rs | 13 ++++++++++++- tests/sqlparser_clickhouse.rs | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3d96a1d714..aa0f6733d3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8564,6 +8564,17 @@ impl<'a> Parser<'a> { None }; + // ClickHouse allows PARTITION BY after ORDER BY + // https://clickhouse.com/docs/en/sql-reference/statements/create/table#partition-by + let partition_by = if create_table_config.partition_by.is_none() + && dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + create_table_config.partition_by + }; + let on_commit = if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT]) { Some(self.parse_create_table_on_commit()?) } else { @@ -8634,7 +8645,7 @@ impl<'a> Parser<'a> { .on_commit(on_commit) .on_cluster(on_cluster) .clustered_by(clustered_by) - .partition_by(create_table_config.partition_by) + .partition_by(partition_by) .cluster_by(create_table_config.cluster_by) .inherits(create_table_config.inherits) .partition_of(partition_of) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 82f79577b9..5ba6e35ed5 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -233,6 +233,28 @@ fn parse_create_table() { ); } +#[test] +fn parse_create_table_partition_by_after_order_by() { + // ClickHouse DDL places PARTITION BY after ORDER BY. + // MergeTree() is canonicalized to MergeTree and type names are uppercased. + clickhouse().one_statement_parses_to( + concat!( + "CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` Int64, `col2` Int32) ", + "ENGINE = MergeTree() ", + "PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "PARTITION BY col1 % 64" + ), + concat!( + "CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` INT64, `col2` Int32) ", + "ENGINE = MergeTree ", + "PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "PARTITION BY col1 % 64" + ), + ); +} + #[test] fn parse_insert_into_function() { clickhouse().verified_stmt(r#"INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')"#); From 1679f378c0b2b799457152f05db78aa6e484d5de Mon Sep 17 00:00:00 2001 From: Cristhian Lopez Vidal Date: Mon, 23 Mar 2026 16:37:08 -0700 Subject: [PATCH 2/3] feat(clickhouse): add ARRAY JOIN, LEFT/INNER ARRAY JOIN support ClickHouse supports ARRAY JOIN clauses for unnesting arrays inline. This adds JoinOperator variants for ARRAY JOIN, LEFT ARRAY JOIN, and INNER ARRAY JOIN. These joins take a table expression (the array to unnest) rather than a standard table reference, and do not use ON/USING constraints. Also adds Spanned impls for the new variants in spans.rs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/query.rs | 15 ++++++ src/ast/spans.rs | 3 ++ src/keywords.rs | 2 + src/parser/mod.rs | 27 ++++++++++ tests/sqlparser_clickhouse.rs | 94 +++++++++++++++++++++++++++++++++++ 5 files changed, 141 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 4d0774e165..bbdd7540af 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2785,6 +2785,13 @@ impl fmt::Display for Join { self.relation, suffix(constraint) )), + JoinOperator::ArrayJoin => f.write_fmt(format_args!("ARRAY JOIN {}", self.relation)), + JoinOperator::LeftArrayJoin => { + f.write_fmt(format_args!("LEFT ARRAY JOIN {}", self.relation)) + } + JoinOperator::InnerArrayJoin => { + f.write_fmt(format_args!("INNER ARRAY JOIN {}", self.relation)) + } } } } @@ -2839,6 +2846,14 @@ pub enum JoinOperator { /// /// See . StraightJoin(JoinConstraint), + /// ClickHouse: `ARRAY JOIN` for unnesting arrays inline. + /// + /// See . + ArrayJoin, + /// ClickHouse: `LEFT ARRAY JOIN` for unnesting arrays inline (preserves rows with empty arrays). + LeftArrayJoin, + /// ClickHouse: `INNER ARRAY JOIN` for unnesting arrays inline (filters rows with empty arrays). + InnerArrayJoin, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index dc8be4aec1..0dc834ba03 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2252,6 +2252,9 @@ impl Spanned for JoinOperator { JoinOperator::Anti(join_constraint) => join_constraint.span(), JoinOperator::Semi(join_constraint) => join_constraint.span(), JoinOperator::StraightJoin(join_constraint) => join_constraint.span(), + JoinOperator::ArrayJoin => Span::empty(), + JoinOperator::LeftArrayJoin => Span::empty(), + JoinOperator::InnerArrayJoin => Span::empty(), } } } diff --git a/src/keywords.rs b/src/keywords.rs index 808e5f03d8..4fc8f72d1d 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1236,6 +1236,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for Clickhouse ARRAY JOIN (ARRAY must not be parsed as a table alias) + Keyword::ARRAY, // for Clickhouse PREWHERE Keyword::PREWHERE, Keyword::SETTINGS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index aa0f6733d3..0299482875 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15779,6 +15779,33 @@ impl<'a> Parser<'a> { constraint: self.parse_join_constraint(false)?, }, } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::INNER, Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: INNER ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::InnerArrayJoin, + } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::LEFT, Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: LEFT ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::LeftArrayJoin, + } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::ArrayJoin, + } } else { let natural = self.parse_keyword(Keyword::NATURAL); let peek_keyword = if let Token::Word(w) = &self.peek_token_ref().token { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5ba6e35ed5..f16a1f8e44 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -253,6 +253,43 @@ fn parse_create_table_partition_by_after_order_by() { "PARTITION BY col1 % 64" ), ); + + // PARTITION BY after ORDER BY works with both ClickHouseDialect and GenericDialect + clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a"); + + // Arithmetic expression in PARTITION BY (roundtrip) + clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64"); + + // AST: partition_by is populated with the correct expression + match clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64") + { + Statement::CreateTable(CreateTable { partition_by, .. }) => { + assert_eq!( + partition_by, + Some(Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("a"))), + op: BinaryOperator::Modulo, + right: Box::new(Expr::Value( + Value::Number("64".parse().unwrap(), false).with_empty_span(), + )), + })) + ); + } + _ => unreachable!(), + } + + // Function call expression in PARTITION BY (ClickHouse-specific function) + clickhouse().verified_stmt( + "CREATE TABLE t (d DATE) ENGINE = MergeTree ORDER BY d PARTITION BY toYYYYMM(d)", + ); + + // Negative: PARTITION BY with no expression should fail + clickhouse_and_generic() + .parse_sql_statements("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY") + .expect_err("PARTITION BY with no expression should fail"); } #[test] @@ -1751,6 +1788,63 @@ fn test_parse_not_null_in_column_options() { ); } +#[test] +fn parse_array_join() { + // ARRAY JOIN works with both ClickHouseDialect and GenericDialect (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x"); + + // AST: join_operator is the unit variant ArrayJoin (no constraint) + match clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::ArrayJoin); + } + _ => unreachable!(), + } + + // Combined: regular JOIN followed by ARRAY JOIN + clickhouse_and_generic() + .verified_stmt("SELECT x FROM t JOIN u ON t.id = u.id ARRAY JOIN arr AS x"); + + // Negative: ARRAY JOIN with no table expression should fail + clickhouse_and_generic() + .parse_sql_statements("SELECT x FROM t ARRAY JOIN") + .expect_err("ARRAY JOIN requires a table expression"); +} + +#[test] +fn parse_left_array_join() { + // LEFT ARRAY JOIN preserves rows with empty/null arrays (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x"); + + // AST: join_operator is LeftArrayJoin + match clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::LeftArrayJoin); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_inner_array_join() { + // INNER ARRAY JOIN filters rows with empty/null arrays (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x"); + + // AST: join_operator is InnerArrayJoin + match clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::InnerArrayJoin); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) } From e7e43d0cb0206006aee10738fe49c755c153d9b6 Mon Sep 17 00:00:00 2001 From: crilopez Date: Thu, 16 Apr 2026 15:45:46 -0700 Subject: [PATCH 3/3] refactor(clickhouse): move dialect_of! checks to Dialect trait methods Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/dialect/clickhouse.rs | 8 ++++++++ src/dialect/generic.rs | 8 ++++++++ src/dialect/mod.rs | 17 +++++++++++++++++ src/parser/mod.rs | 8 ++++---- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 87c762f0bf..6ee60cc993 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -64,6 +64,14 @@ impl Dialect for ClickHouseDialect { true } + fn supports_partition_by_after_order_by(&self) -> bool { + true + } + + fn supports_array_join_syntax(&self) -> bool { + true + } + // ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting // with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected. // diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 674311a929..25f57e3d18 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -45,6 +45,14 @@ impl Dialect for GenericDialect { true } + fn supports_partition_by_after_order_by(&self) -> bool { + true + } + + fn supports_array_join_syntax(&self) -> bool { + true + } + fn supports_group_by_expr(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 93ca5cf6af..6ab6cb15e4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -349,6 +349,23 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `PARTITION BY` appearing after `ORDER BY` + /// in a `CREATE TABLE` statement (in addition to the standard placement before `ORDER BY`). + /// + /// ClickHouse DDL uses this ordering: + /// + fn supports_partition_by_after_order_by(&self) -> bool { + false + } + + /// Returns true if the dialect supports ClickHouse-style `ARRAY JOIN` / `LEFT ARRAY JOIN` / + /// `INNER ARRAY JOIN` syntax for unnesting arrays inline. + /// + /// + fn supports_array_join_syntax(&self) -> bool { + false + } + /// Returns true if the dialects supports `group sets, roll up, or cube` expressions. fn supports_group_by_expr(&self) -> bool { false diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0299482875..668c520e5e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8567,7 +8567,7 @@ impl<'a> Parser<'a> { // ClickHouse allows PARTITION BY after ORDER BY // https://clickhouse.com/docs/en/sql-reference/statements/create/table#partition-by let partition_by = if create_table_config.partition_by.is_none() - && dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.dialect.supports_partition_by_after_order_by() && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { Some(Box::new(self.parse_expr()?)) @@ -15779,7 +15779,7 @@ impl<'a> Parser<'a> { constraint: self.parse_join_constraint(false)?, }, } - } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + } else if self.dialect.supports_array_join_syntax() && self.parse_keywords(&[Keyword::INNER, Keyword::ARRAY, Keyword::JOIN]) { // ClickHouse: INNER ARRAY JOIN @@ -15788,7 +15788,7 @@ impl<'a> Parser<'a> { global, join_operator: JoinOperator::InnerArrayJoin, } - } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + } else if self.dialect.supports_array_join_syntax() && self.parse_keywords(&[Keyword::LEFT, Keyword::ARRAY, Keyword::JOIN]) { // ClickHouse: LEFT ARRAY JOIN @@ -15797,7 +15797,7 @@ impl<'a> Parser<'a> { global, join_operator: JoinOperator::LeftArrayJoin, } - } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + } else if self.dialect.supports_array_join_syntax() && self.parse_keywords(&[Keyword::ARRAY, Keyword::JOIN]) { // ClickHouse: ARRAY JOIN