Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,58 @@ public void testAggregateWithMultipleGroupingLevels(String dataStoreName) throws
testCountApi(dataStoreName, query, "query/multi_level_grouping_response.json");
}

@ParameterizedTest
@ArgumentsSource(AllProvider.class)
public void testSortByListSizeWithMissingField(String dataStoreName) throws IOException {
Datastore datastore = datastoreMap.get(dataStoreName);
String collectionName = "list_size_sort_collection";
Comment on lines +922 to +926

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The integration test runs against nested collections (the standard document store pattern) which exercises the jsonb_array_length path. The flat collection path (ARRAY_LENGTH) is covered by the existing unit tests in PostgresQueryParserTest which validate SQL generation for LENGTH on aggregation aliases. The flat collection LENGTH on a direct column field follows the same ARRAY_LENGTH codepath and is validated by the existing tests that assert correct SQL output.

datastore.deleteCollection(collectionName);
datastore.createCollection(collectionName, null);
Collection collection = datastore.getCollection(collectionName);
Comment on lines +924 to +929

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Wrapped the test body in try-finally in be9514a so deleteCollection runs regardless of assertion failures.


try {
collection.upsert(
new SingleValueKey(TENANT_ID, "three"),
new JSONDocument("{\"item\":\"three\",\"tags\":[\"a\",\"b\",\"c\"]}"));
collection.upsert(
new SingleValueKey(TENANT_ID, "one"),
new JSONDocument("{\"item\":\"one\",\"tags\":[\"x\"]}"));
// Document intentionally missing the "tags" field; LENGTH must resolve to 0 instead of
// failing
collection.upsert(
new SingleValueKey(TENANT_ID, "none"), new JSONDocument("{\"item\":\"none\"}"));

Query query =
Query.builder()
.addSelection(IdentifierExpression.of("item"))
.addSelection(
FunctionExpression.builder()
.operator(LENGTH)
.operand(IdentifierExpression.of("tags"))
.build(),
"tag_count")
.addSort(IdentifierExpression.of("tag_count"), ASC)
.build();

Iterator<Document> resultDocs = collection.aggregate(query);
List<Map<String, Object>> results = new ArrayList<>();
while (resultDocs.hasNext()) {
results.add(Utils.convertDocumentToMap(resultDocs.next()));
}

assertEquals(3, results.size());
// Document without the "tags" field counts as 0 and sorts first in ascending order
assertEquals("none", results.get(0).get("item"));
assertEquals(0, ((Number) results.get(0).get("tag_count")).intValue());
assertEquals("one", results.get(1).get("item"));
assertEquals(1, ((Number) results.get(1).get("tag_count")).intValue());
assertEquals("three", results.get(2).get("item"));
assertEquals(3, ((Number) results.get(2).get("tag_count")).intValue());
} finally {
datastore.deleteCollection(collectionName);
}
}

@ParameterizedTest
@ArgumentsSource(AllProvider.class)
public void testAggregateWithFunctionalLeftHandSideFilter(final String dataStoreName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ Map<String, Object> parse(final FunctionExpression expression) {

if (numArgs == 1) {
Object value = expression.getOperands().get(0).accept(parser);
// $size fails when the operand resolves to a missing/absent (or null) field. Default such a
// value to an empty array so that LENGTH of an absent field is 0 instead of throwing an
// error.
if (operator == LENGTH) {
value = Map.of("$ifNull", List.of(value, List.of()));
}
return Map.of(key, value);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.stream.Collectors;
import lombok.NoArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.hypertrace.core.documentstore.DocumentType;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
import org.hypertrace.core.documentstore.expression.operators.FunctionOperator;
import org.hypertrace.core.documentstore.expression.type.SelectTypeExpression;
Expand Down Expand Up @@ -49,10 +50,11 @@ public String visit(final FunctionExpression expression) {
}

if (numArgs == 1) {
if (expression.getOperator().equals(FunctionOperator.LENGTH)) {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did we have same problem here with length function?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests were failing for postgres without the changes.

@puneet-traceable puneet-traceable Jun 23, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are the tests failing when there is no change to postgres path?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new integration test testSortByListSizeWithMissingField runs on both Mongo and Postgres (via @ArgumentsSource(AllProvider.class)). The test applies LENGTH on a raw jsonb field (tags) where one document is missing that field entirely.

The original Postgres LENGTH implementation used ARRAY_LENGTH(parsedExpression, 1) — but parsedExpression here resolved via getParsedExpression() which uses PostgresDataAccessorIdentifierExpressionVisitor. That visitor casts the jsonb field to NUMERIC (via ->> + CAST), resulting in ARRAY_LENGTH(CAST(document->>'tags' AS NUMERIC), 1) — which fails with function array_length(numeric, integer) does not exist.

Even before this PR, calling LENGTH on a raw jsonb field in Postgres would have failed the same way — it just wasn't tested. The fix distinguishes between native PG arrays (from aggregations) and raw jsonb fields, using jsonb_array_length with a jsonb_typeof guard for the latter.

return buildLengthExpression(expression.getOperands().get(0));
}
String parsedExpression = getParsedExpression(expression.getOperands().get(0));
return expression.getOperator().equals(FunctionOperator.LENGTH)
? String.format("ARRAY_LENGTH( %s, %s )", parsedExpression, ARRAY_DIMENSION)
: String.format("%s( %s )", expression.getOperator(), parsedExpression);
return String.format("%s( %s )", expression.getOperator(), parsedExpression);
}

Collector<String, ?, String> collector =
Expand Down Expand Up @@ -83,6 +85,27 @@ private Collector getCollectorForFunctionOperator(FunctionOperator operator) {
String.format("Query operation:%s not supported", operator));
}

private String buildLengthExpression(final SelectTypeExpression operand) {
Optional<String> identifier = Optional.ofNullable(operand.accept(identifierExpressionVisitor));
Optional<String> resolvedSelection =
identifier.map(v -> getPostgresQueryParser().getPgSelections().get(v));
if (resolvedSelection.isPresent()) {
return String.format(
"COALESCE( ARRAY_LENGTH( %s, %s ), 0 )", resolvedSelection.get(), ARRAY_DIMENSION);
}
Comment on lines +89 to +95

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In practice, the pgSelections path is only reached when LENGTH operates on an alias defined in a prior selection — and those aliases always come from aggregation expressions (ARRAY_AGG, DISTINCT_ARRAY) that produce native PG arrays. There's no existing usage pattern where a raw jsonb field is aliased and then LENGTH is applied to that alias. The direct field access case (without an alias) is handled by the branch below which uses jsonb_typeof for nested collections.

PostgresFieldIdentifierExpressionVisitor fieldVisitor =
new PostgresFieldIdentifierExpressionVisitor(getPostgresQueryParser());
String parsedExpression = operand.accept(fieldVisitor);
if (getPostgresQueryParser().getPgColTransformer().getDocumentType() == DocumentType.FLAT) {
return String.format(
"COALESCE( ARRAY_LENGTH( %s, %s ), 0 )", parsedExpression, ARRAY_DIMENSION);
}
return String.format(

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sarthak77 Can you please attempt to test a case where the field to be use with length function is inside a column which is a jsonb column. Flat collections can have a column which is jsonb and custom attributes will be placed there. So, from my understanding the check above getPostgresQueryParser().getPgColTransformer().getDocumentType() == DocumentType.FLAT to determine whether to use jsonb_array_length is not right.

"jsonb_array_length( CASE WHEN jsonb_typeof( %s ) = 'array' THEN %s"
+ " ELSE '[]'::jsonb END )",
parsedExpression, parsedExpression);
}

private String getParsedExpression(final SelectTypeExpression expression) {
Optional<String> identifier =
Optional.ofNullable(expression.accept(identifierExpressionVisitor));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ void testAggregationExpressionDistinctCount() {

assertEquals(
"SELECT ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)) AS \"qty_distinct\", "
+ "ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) AS \"qty_distinct_length\" "
+ "COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) AS \"qty_distinct_length\" "
+ "FROM \"testCollection\" "
+ "WHERE CAST (document->>'price' AS NUMERIC) = ? "
+ "GROUP BY document->'item'",
Expand Down Expand Up @@ -435,10 +435,10 @@ void testAggregateWithMultipleGroupingLevels() {
assertEquals(
"SELECT document->'item' AS \"item\", document->'price' AS \"price\", "
+ "ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)) AS \"quantities\", "
+ "ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) AS \"num_quantities\" "
+ "COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) AS \"num_quantities\" "
+ "FROM \"testCollection\" "
+ "GROUP BY document->'item',document->'price' "
+ "HAVING ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) = ? "
+ "HAVING COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) = ? "
+ "ORDER BY document->'item' DESC NULLS LAST",
sql);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
{
"$project": {
"section_count": {
"$size": "$section_count"
"$size": {
"$ifNull": [
"$section_count",
[]
]
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
{
"$project": {
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
{
"$project": {
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
{
"$project": {
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
"$project": {
"name": 1,
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
}
Expand Down
Loading