Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions runtime/drivers/clickhouse/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ func (d *dialect) RequiresArrayContainsForInOperator() bool { return true }

func (d *dialect) GetArrayContainsFunction() (string, error) { return "hasAny", nil }

// ArrayContainsSubqueryExpr uses arrayExists so the match is evaluated per row.
// hasAny only accepts a literal array, and unnesting via LEFT ARRAY JOIN would duplicate rows whose array contains multiple matching values.
func (d *dialect) ArrayContainsSubqueryExpr(arrExpr, subqueryExpr string) (string, error) {
return fmt.Sprintf("arrayExists(x -> x IN %s, (%s))", subqueryExpr, arrExpr), nil
}

func (d *dialect) CastToDataType(typ runtimev1.Type_Code) (string, error) {
switch typ {
case runtimev1.Type_CODE_TIMESTAMP:
Expand Down
5 changes: 5 additions & 0 deletions runtime/drivers/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type Dialect interface {
GetRegexMatchFunction() (string, error)
RequiresArrayContainsForInOperator() bool
GetArrayContainsFunction() (string, error)
ArrayContainsSubqueryExpr(arrExpr, subqueryExpr string) (string, error)
DimensionSelect(escapeTable string, dim *runtimev1.MetricsViewSpec_Dimension) (dimSelect, unnestClause string, err error)
LateralUnnest(expr, tableAlias, colName string) (tbl string, tupleStyle, auto bool, err error)
UnnestSQLSuffix(tbl string) string
Expand Down Expand Up @@ -228,6 +229,10 @@ func (b *BaseDialect) GetArrayContainsFunction() (string, error) {
return "", fmt.Errorf("array contains not supported for %s dialect", b.String())
}

func (b *BaseDialect) ArrayContainsSubqueryExpr(_, _ string) (string, error) {
return "", fmt.Errorf("array contains subquery not supported for %s dialect", b.String())
}

func (b *BaseDialect) MetricsViewDimensionExpression(dimension *runtimev1.MetricsViewSpec_Dimension) (string, error) {
if dimension.LookupTable != "" {
return "", fmt.Errorf("lookup tables are not supported for %s dialect", b.String())
Expand Down
50 changes: 40 additions & 10 deletions runtime/metricsview/astexpr.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ func (b *sqlExprBuilder) writeValue(val any) error {
}

func (b *sqlExprBuilder) writeSubquery(sub *Subquery) error {
expr, args, err := b.subqueryExpr(sub)
if err != nil {
return err
}
b.writeString(expr)
b.args = append(b.args, args...)
return nil
}

// subqueryExpr builds the SQL for a subquery and returns it as a parenthesized scalar subquery
// selecting the subquery's dimension, along with its bound args. It does not write to the builder.
func (b *sqlExprBuilder) subqueryExpr(sub *Subquery) (string, []any, error) {
// We construct a Query that combines the parent Query's contextual info with that of the Subquery.
outer := b.ast.Query
inner := &Query{
Expand Down Expand Up @@ -110,21 +122,15 @@ func (b *sqlExprBuilder) writeSubquery(sub *Subquery) error {
}
innerAST, err := NewAST(b.ast.MetricsView, innerSecurity, inner, b.ast.Dialect)
if err != nil {
return fmt.Errorf("failed to create AST for subquery: %w", err)
return "", nil, fmt.Errorf("failed to create AST for subquery: %w", err)
}
sql, args, err := innerAST.SQL()
if err != nil {
return fmt.Errorf("failed to generate SQL for subquery: %w", err)
return "", nil, fmt.Errorf("failed to generate SQL for subquery: %w", err)
}

// Output: (SELECT <dimension> FROM (<subquery>))
b.writeString("(SELECT ")
b.writeString(b.ast.Dialect.EscapeIdentifier(sub.Dimension.Name))
b.writeString(" FROM (")
b.writeString(sql)
b.writeString("))")
b.args = append(b.args, args...)
return nil
return fmt.Sprintf("(SELECT %s FROM (%s))", b.ast.Dialect.EscapeIdentifier(sub.Dimension.Name), sql), args, nil
}

func (b *sqlExprBuilder) writeCondition(cond *Condition) error {
Expand Down Expand Up @@ -279,7 +285,8 @@ func (b *sqlExprBuilder) writeBinaryCondition(exprs []*Expression, op Operator)

// For IN/NIN on unnest dimensions backed by DuckDB or ClickHouse, use native array-contains
// functions (list_has_any / hasAny). This avoids double-counting when a row's array contains multiple matching values.
if (op == OperatorIn || op == OperatorNin) && b.ast.Dialect.RequiresArrayContainsForInOperator() {
// These functions only accept a literal list, so a subquery right-hand side is handled separately below.
if (op == OperatorIn || op == OperatorNin) && right.Value != nil && b.ast.Dialect.RequiresArrayContainsForInOperator() {
return b.writeArrayContainsCondition(leftExpr, right, op == OperatorNin)
}

Expand All @@ -293,6 +300,29 @@ func (b *sqlExprBuilder) writeBinaryCondition(exprs []*Expression, op Operator)
// Means the DB automatically unnests, so we can treat it as a normal value
return b.writeBinaryConditionInner(nil, right, leftExpr, op)
}

// For IN/NIN with a subquery right-hand side on a dialect that unnests via an array join (tupleStyle == false, e.g. ClickHouse),
// adding the join would double-count rows whose array contains multiple matching values. Use a per-row array-contains expression instead.
// On dialects that unnest via a lateral EXISTS semi-join (tupleStyle == true, e.g. DuckDB), the fall-through below already avoids duplication.
if !tupleStyle && (op == OperatorIn || op == OperatorNin) && right.Subquery != nil && b.ast.Dialect.RequiresArrayContainsForInOperator() {
subExpr, subArgs, err := b.subqueryExpr(right.Subquery)
if err != nil {
return err
}
containsExpr, err := b.ast.Dialect.ArrayContainsSubqueryExpr(leftExpr, subExpr)
if err != nil {
return err
}
b.writeByte('(')
if op == OperatorNin {
b.writeString("NOT ")
}
b.writeString(containsExpr)
b.writeByte(')')
b.args = append(b.args, subArgs...)
return nil
}

var unnestColAlias string
if tupleStyle {
unnestColAlias = b.ast.Dialect.EscapeMember(unnestTableAlias, left.Name)
Expand Down
69 changes: 69 additions & 0 deletions runtime/metricsview/astexpr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,75 @@ func TestArrayContainsCondition(t *testing.T) {
wantSQL: `(hasAny(("tags"), [?,?]))`,
wantArgs: []any{nil, "a"},
},
{
name: "clickhouse: in on unnest dim with subquery uses arrayExists",
dialect: clickhouse.DialectClickhouse,
where: &Expression{Condition: &Condition{
Operator: OperatorIn,
Expressions: []*Expression{
{Name: "tags"},
{Subquery: &Subquery{
Dimension: Dimension{Name: "tags"},
Measures: []Measure{{Name: "count"}},
Having: &Expression{Condition: &Condition{
Operator: OperatorGt,
Expressions: []*Expression{
{Name: "count"},
{Value: 1},
},
}},
}},
},
}},
wantSQL: `(arrayExists(x -> x IN (SELECT "tags" FROM (SELECT ("t2"."tags") AS "tags", ("t2"."count") AS "count" FROM (SELECT ("tags") AS "tags", (count(*)) AS "count" FROM "test_table" LEFT ARRAY JOIN "tags" AS "tags" GROUP BY 1) t2 WHERE (("t2"."count") > ?))), ("tags")))`,
wantArgs: []any{1},
},
{
name: "clickhouse: nin on unnest dim with subquery uses NOT arrayExists",
dialect: clickhouse.DialectClickhouse,
where: &Expression{Condition: &Condition{
Operator: OperatorNin,
Expressions: []*Expression{
{Name: "tags"},
{Subquery: &Subquery{
Dimension: Dimension{Name: "tags"},
Measures: []Measure{{Name: "count"}},
Having: &Expression{Condition: &Condition{
Operator: OperatorGt,
Expressions: []*Expression{
{Name: "count"},
{Value: 1},
},
}},
}},
},
}},
wantSQL: `(NOT arrayExists(x -> x IN (SELECT "tags" FROM (SELECT ("t2"."tags") AS "tags", ("t2"."count") AS "count" FROM (SELECT ("tags") AS "tags", (count(*)) AS "count" FROM "test_table" LEFT ARRAY JOIN "tags" AS "tags" GROUP BY 1) t2 WHERE (("t2"."count") > ?))), ("tags")))`,
wantArgs: []any{1},
},
{
name: "duckdb: in on unnest dim with subquery uses lateral EXISTS",
dialect: duckdb.DialectDuckDB,
where: &Expression{Condition: &Condition{
Operator: OperatorIn,
Expressions: []*Expression{
{Name: "tags"},
{Subquery: &Subquery{
Dimension: Dimension{Name: "tags"},
Measures: []Measure{{Name: "count"}},
Having: &Expression{Condition: &Condition{
Operator: OperatorGt,
Expressions: []*Expression{
{Name: "count"},
{Value: 1},
},
}},
}},
},
}},
wantSQL: `EXISTS (SELECT 1 FROM LATERAL UNNEST("tags") t2("tags") WHERE (("t2"."tags") IN (SELECT "tags" FROM (SELECT ("t2"."tags") AS "tags", ("t2"."count") AS "count" FROM (SELECT ("t0"."tags") AS "tags", (count(*)) AS "count" FROM "test_table", LATERAL UNNEST("tags") t0("tags") GROUP BY 1) t2 WHERE (("t2"."count") > ?)))))`,
wantArgs: []any{1},
},
{
name: "duckdb: in on non-unnest dim uses normal IN",
dialect: duckdb.DialectDuckDB,
Expand Down
Loading