diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_acceptance.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_acceptance.py new file mode 100644 index 00000000..676a0bec --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_acceptance.py @@ -0,0 +1,233 @@ +"""Tests for $project accepted inputs.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [$meta in Projection]: $meta expressions are accepted in +# projection, with some meta types producing no visible field and $meta +# being the only expression form allowed in exclusion mode. +PROJECT_META_TESTS: list[StageTestCase] = [ + StageTestCase( + "meta_randval_no_visible_field", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": 1, "rv": {"$meta": "randVal"}}}], + expected=[{"_id": 1, "a": 10}], + msg="$project $meta: 'randVal' should produce no visible field in output", + ), + StageTestCase( + "meta_indexkey_no_visible_field", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": 1, "ik": {"$meta": "indexKey"}}}], + expected=[{"_id": 1, "a": 10}], + msg="$project $meta: 'indexKey' should produce no visible field in output", + ), + StageTestCase( + "meta_searchscore_no_visible_field", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": 1, "ss": {"$meta": "searchScore"}}}], + expected=[{"_id": 1, "a": 10}], + msg="$project $meta: 'searchScore' should produce no visible field in output", + ), + StageTestCase( + "meta_searchhighlights_no_visible_field", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": 1, "sh": {"$meta": "searchHighlights"}}}], + expected=[{"_id": 1, "a": 10}], + msg="$project $meta: 'searchHighlights' should produce no visible field in output", + ), + StageTestCase( + "meta_exclusion_mode_allowed", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": 0, "rv": {"$meta": "randVal"}}}], + expected=[{"_id": 1, "b": 20}], + msg=( + "$project $meta should be allowed in exclusion mode" + " without triggering mixing restriction" + ), + ), +] + +# Property [Sub-Projection Behaviors]: sub-document notation in a projection +# applies inclusion, computed, and expression rules within the nested document. +PROJECT_SUB_PROJECTION_TESTS: list[StageTestCase] = [ + StageTestCase( + "sub_proj_inclusion_and_computed", + docs=[{"_id": 1, "a": {"x": 10, "y": 20, "z": 30}}], + pipeline=[{"$project": {"a": {"x": 1, "computed": {"$add": [1, 2]}}}}], + expected=[{"_id": 1, "a": {"x": 10, "computed": 3}}], + msg=( + "$project sub-projection should support a mix of inclusion" + " and computed fields within the nested document" + ), + ), + StageTestCase( + "sub_proj_literal_is_expression", + docs=[{"_id": 1, "a": {"x": 10, "y": 20}}], + pipeline=[{"$project": {"a": {"$literal": 1}}}], + expected=[{"_id": 1, "a": 1}], + msg=( + "$project $literal in a sub-document should be treated as an" + " expression, not as a sub-projection" + ), + ), + StageTestCase( + "sub_proj_scalar_field_omitted", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"a": {"x": 1}}}], + expected=[{"_id": 1}], + msg="$project sub-projection on a scalar field should omit the field from output", + ), +] + +# Property [Empty Collection]: projecting from a collection with no documents +# returns an empty result set without error. +PROJECT_EMPTY_COLLECTION_TESTS: list[StageTestCase] = [ + StageTestCase( + "empty_collection_inclusion", + docs=[], + pipeline=[{"$project": {"a": 1}}], + expected=[], + msg="$project inclusion on an empty collection should return empty result", + ), + StageTestCase( + "empty_collection_exclusion", + docs=[], + pipeline=[{"$project": {"a": 0}}], + expected=[], + msg="$project exclusion on an empty collection should return empty result", + ), + StageTestCase( + "empty_collection_computed", + docs=[], + pipeline=[{"$project": {"r": {"$add": [1, 2]}}}], + expected=[], + msg="$project computed field on an empty collection should return empty result", + ), +] + +# Property [Path Collision Non-Errors]: sibling paths and equivalent dotted +# and nested paths do not produce path collision errors. +PROJECT_PATH_COLLISION_NON_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "collision_sibling_paths_no_error", + docs=[{"_id": 1, "a": {"b": 1, "c": 2}}], + pipeline=[{"$project": {"a.b": 1, "a.c": 1}}], + expected=[{"_id": 1, "a": {"b": 1, "c": 2}}], + msg="$project should allow sibling paths without collision", + ), + StageTestCase( + "collision_nested_equivalent_no_error", + docs=[{"_id": 1, "a": {"b": 1, "c": 2}}], + pipeline=[{"$project": {"a": {"b": 1}, "a.c": 1}}], + expected=[{"_id": 1, "a": {"b": 1, "c": 2}}], + msg="$project should merge dotted and nested equivalent paths without collision", + ), +] + +# Property [Field Name Acceptance]: field names with non-leading dollar signs, +# spaces, numeric names, and Unicode characters are accepted. +PROJECT_FIELD_NAME_ACCEPTANCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "field_name_non_leading_dollar", + docs=[{"_id": 1, "a$bc": 10, "d": 20}], + pipeline=[{"$project": {"a$bc": 1}}], + expected=[{"_id": 1, "a$bc": 10}], + msg="$project should accept non-leading $ in field names", + ), + StageTestCase( + "field_name_space", + docs=[{"_id": 1, "field name": 10, "d": 20}], + pipeline=[{"$project": {"field name": 1}}], + expected=[{"_id": 1, "field name": 10}], + msg="$project should accept spaces in field names", + ), + StageTestCase( + "field_name_numeric", + docs=[{"_id": 1, "123": 10, "d": 20}], + pipeline=[{"$project": {"123": 1}}], + expected=[{"_id": 1, "123": 10}], + msg="$project should accept numeric field names", + ), + StageTestCase( + "field_name_unicode", + docs=[{"_id": 1, "caf\u00e9": 10, "d": 20}], + pipeline=[{"$project": {"caf\u00e9": 1}}], + expected=[{"_id": 1, "caf\u00e9": 10}], + msg="$project should accept Unicode characters in field names", + ), +] + +# Property [Large Projections]: projections with a large number of fields +# succeed for both inclusion and exclusion modes. +PROJECT_LARGE_PROJECTION_TESTS: list[StageTestCase] = [ + StageTestCase( + "large_inclusion_500_fields", + docs=[{"_id": 1, **{f"f{i}": i for i in range(500)}}], + pipeline=[{"$project": {f"f{i}": 1 for i in range(500)}}], + expected=[{"_id": 1, **{f"f{i}": i for i in range(500)}}], + msg="$project should succeed with 500 included fields", + ), + StageTestCase( + "large_exclusion_500_fields", + docs=[{"_id": 1, **{f"f{i}": i for i in range(500)}}], + pipeline=[{"$project": {f"f{i}": 0 for i in range(500)}}], + expected=[{"_id": 1}], + msg="$project should succeed with 500 excluded fields", + ), +] + +# Property [Pipeline Semantics]: consecutive $project stages compose +# correctly, narrowing fields progressively. +PROJECT_PIPELINE_SEMANTICS_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_consecutive_project", + docs=[{"_id": 1, "a": 10, "b": 20, "c": 30}], + pipeline=[ + {"$project": {"a": 1, "b": 1}}, + {"$project": {"a": 1}}, + ], + expected=[{"_id": 1, "a": 10}], + msg="$project consecutive stages should narrow fields progressively", + ), +] + +PROJECT_ACCEPTANCE_TESTS = ( + PROJECT_META_TESTS + + PROJECT_SUB_PROJECTION_TESTS + + PROJECT_EMPTY_COLLECTION_TESTS + + PROJECT_PATH_COLLISION_NON_ERROR_TESTS + + PROJECT_FIELD_NAME_ACCEPTANCE_TESTS + + PROJECT_LARGE_PROJECTION_TESTS + + PROJECT_PIPELINE_SEMANTICS_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_ACCEPTANCE_TESTS)) +def test_project_acceptance(collection: Any, test_case: StageTestCase) -> None: + """Test $project accepted inputs.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_bson_types.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_bson_types.py new file mode 100644 index 00000000..09710315 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_bson_types.py @@ -0,0 +1,435 @@ +"""Tests that $project preserves all BSON types through each projection mode.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import Binary, Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [BSON Type Inclusion]: each BSON type is preserved unchanged +# when the field is explicitly included. +PROJECT_BSON_INCLUSION_TESTS: list[StageTestCase] = [ + StageTestCase( + "inclusion_null", + docs=[{"_id": 1, "v": None}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": None}], + msg="$project inclusion should preserve null", + ), + StageTestCase( + "inclusion_string", + docs=[{"_id": 1, "v": "hello"}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": "hello"}], + msg="$project inclusion should preserve string", + ), + StageTestCase( + "inclusion_int32", + docs=[{"_id": 1, "v": 42}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": 42}], + msg="$project inclusion should preserve int32", + ), + StageTestCase( + "inclusion_int64", + docs=[{"_id": 1, "v": Int64(123_456_789_012_345)}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": Int64(123_456_789_012_345)}], + msg="$project inclusion should preserve int64", + ), + StageTestCase( + "inclusion_double", + docs=[{"_id": 1, "v": 3.14}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": 3.14}], + msg="$project inclusion should preserve double", + ), + StageTestCase( + "inclusion_decimal128", + docs=[{"_id": 1, "v": Decimal128("1.23")}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": Decimal128("1.23")}], + msg="$project inclusion should preserve Decimal128", + ), + StageTestCase( + "inclusion_bool", + docs=[{"_id": 1, "v": True}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": True}], + msg="$project inclusion should preserve bool", + ), + StageTestCase( + "inclusion_array", + docs=[{"_id": 1, "v": [1, 2, 3]}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": [1, 2, 3]}], + msg="$project inclusion should preserve array", + ), + StageTestCase( + "inclusion_object", + docs=[{"_id": 1, "v": {"nested": "doc"}}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": {"nested": "doc"}}], + msg="$project inclusion should preserve object", + ), + StageTestCase( + "inclusion_objectid", + docs=[{"_id": 1, "v": ObjectId("507f1f77bcf86cd799439011")}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": ObjectId("507f1f77bcf86cd799439011")}], + msg="$project inclusion should preserve ObjectId", + ), + StageTestCase( + "inclusion_datetime", + docs=[{"_id": 1, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": datetime(2024, 1, 1)}], + msg="$project inclusion should preserve datetime", + ), + StageTestCase( + "inclusion_timestamp", + docs=[{"_id": 1, "v": Timestamp(1_234_567_890, 1)}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": Timestamp(1_234_567_890, 1)}], + msg="$project inclusion should preserve Timestamp", + ), + StageTestCase( + "inclusion_binary", + docs=[{"_id": 1, "v": Binary(b"hello")}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": b"hello"}], + msg="$project inclusion should preserve Binary", + ), + StageTestCase( + "inclusion_regex", + docs=[{"_id": 1, "v": Regex("abc", "i")}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": Regex("abc", "i")}], + msg="$project inclusion should preserve Regex", + ), + StageTestCase( + "inclusion_code", + docs=[{"_id": 1, "v": Code("function() {}")}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": Code("function() {}")}], + msg="$project inclusion should preserve Code", + ), + StageTestCase( + "inclusion_code_with_scope", + docs=[{"_id": 1, "v": Code("function() { return x; }", {"x": 1})}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": Code("function() { return x; }", {"x": 1})}], + msg="$project inclusion should preserve CodeWithScope", + ), + StageTestCase( + "inclusion_minkey", + docs=[{"_id": 1, "v": MinKey()}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": MinKey()}], + msg="$project inclusion should preserve MinKey", + ), + StageTestCase( + "inclusion_maxkey", + docs=[{"_id": 1, "v": MaxKey()}], + pipeline=[{"$project": {"v": 1}}], + expected=[{"_id": 1, "v": MaxKey()}], + msg="$project inclusion should preserve MaxKey", + ), +] + +# Property [BSON Type Exclusion]: each BSON type is preserved unchanged +# when a different field is excluded. +PROJECT_BSON_EXCLUSION_TESTS: list[StageTestCase] = [ + StageTestCase( + "exclusion_null", + docs=[{"_id": 1, "v": None, "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": None}], + msg="$project exclusion should preserve null", + ), + StageTestCase( + "exclusion_string", + docs=[{"_id": 1, "v": "hello", "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": "hello"}], + msg="$project exclusion should preserve string", + ), + StageTestCase( + "exclusion_int32", + docs=[{"_id": 1, "v": 42, "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": 42}], + msg="$project exclusion should preserve int32", + ), + StageTestCase( + "exclusion_int64", + docs=[{"_id": 1, "v": Int64(123_456_789_012_345), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": Int64(123_456_789_012_345)}], + msg="$project exclusion should preserve int64", + ), + StageTestCase( + "exclusion_double", + docs=[{"_id": 1, "v": 3.14, "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": 3.14}], + msg="$project exclusion should preserve double", + ), + StageTestCase( + "exclusion_decimal128", + docs=[{"_id": 1, "v": Decimal128("1.23"), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": Decimal128("1.23")}], + msg="$project exclusion should preserve Decimal128", + ), + StageTestCase( + "exclusion_bool", + docs=[{"_id": 1, "v": True, "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": True}], + msg="$project exclusion should preserve bool", + ), + StageTestCase( + "exclusion_array", + docs=[{"_id": 1, "v": [1, 2, 3], "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": [1, 2, 3]}], + msg="$project exclusion should preserve array", + ), + StageTestCase( + "exclusion_object", + docs=[{"_id": 1, "v": {"nested": "doc"}, "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": {"nested": "doc"}}], + msg="$project exclusion should preserve object", + ), + StageTestCase( + "exclusion_objectid", + docs=[{"_id": 1, "v": ObjectId("507f1f77bcf86cd799439011"), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": ObjectId("507f1f77bcf86cd799439011")}], + msg="$project exclusion should preserve ObjectId", + ), + StageTestCase( + "exclusion_datetime", + docs=[{"_id": 1, "v": datetime(2024, 1, 1, tzinfo=timezone.utc), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": datetime(2024, 1, 1)}], + msg="$project exclusion should preserve datetime", + ), + StageTestCase( + "exclusion_timestamp", + docs=[{"_id": 1, "v": Timestamp(1_234_567_890, 1), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": Timestamp(1_234_567_890, 1)}], + msg="$project exclusion should preserve Timestamp", + ), + StageTestCase( + "exclusion_binary", + docs=[{"_id": 1, "v": Binary(b"hello"), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": b"hello"}], + msg="$project exclusion should preserve Binary", + ), + StageTestCase( + "exclusion_regex", + docs=[{"_id": 1, "v": Regex("abc", "i"), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": Regex("abc", "i")}], + msg="$project exclusion should preserve Regex", + ), + StageTestCase( + "exclusion_code", + docs=[{"_id": 1, "v": Code("function() {}"), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": Code("function() {}")}], + msg="$project exclusion should preserve Code", + ), + StageTestCase( + "exclusion_code_with_scope", + docs=[{"_id": 1, "v": Code("function() { return x; }", {"x": 1}), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": Code("function() { return x; }", {"x": 1})}], + msg="$project exclusion should preserve CodeWithScope", + ), + StageTestCase( + "exclusion_minkey", + docs=[{"_id": 1, "v": MinKey(), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": MinKey()}], + msg="$project exclusion should preserve MinKey", + ), + StageTestCase( + "exclusion_maxkey", + docs=[{"_id": 1, "v": MaxKey(), "x": 0}], + pipeline=[{"$project": {"x": 0}}], + expected=[{"_id": 1, "v": MaxKey()}], + msg="$project exclusion should preserve MaxKey", + ), +] + +# Property [BSON Type Field Path Reference]: each BSON type is preserved +# unchanged when copied via a field path expression. +PROJECT_BSON_FIELD_PATH_TESTS: list[StageTestCase] = [ + StageTestCase( + "field_path_null", + docs=[{"_id": 1, "v": None}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": None}], + msg="$project field path should preserve null", + ), + StageTestCase( + "field_path_string", + docs=[{"_id": 1, "v": "hello"}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": "hello"}], + msg="$project field path should preserve string", + ), + StageTestCase( + "field_path_int32", + docs=[{"_id": 1, "v": 42}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": 42}], + msg="$project field path should preserve int32", + ), + StageTestCase( + "field_path_int64", + docs=[{"_id": 1, "v": Int64(123_456_789_012_345)}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": Int64(123_456_789_012_345)}], + msg="$project field path should preserve int64", + ), + StageTestCase( + "field_path_double", + docs=[{"_id": 1, "v": 3.14}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": 3.14}], + msg="$project field path should preserve double", + ), + StageTestCase( + "field_path_decimal128", + docs=[{"_id": 1, "v": Decimal128("1.23")}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": Decimal128("1.23")}], + msg="$project field path should preserve Decimal128", + ), + StageTestCase( + "field_path_bool", + docs=[{"_id": 1, "v": True}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": True}], + msg="$project field path should preserve bool", + ), + StageTestCase( + "field_path_array", + docs=[{"_id": 1, "v": [1, 2, 3]}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": [1, 2, 3]}], + msg="$project field path should preserve array", + ), + StageTestCase( + "field_path_object", + docs=[{"_id": 1, "v": {"nested": "doc"}}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": {"nested": "doc"}}], + msg="$project field path should preserve object", + ), + StageTestCase( + "field_path_objectid", + docs=[{"_id": 1, "v": ObjectId("507f1f77bcf86cd799439011")}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": ObjectId("507f1f77bcf86cd799439011")}], + msg="$project field path should preserve ObjectId", + ), + StageTestCase( + "field_path_datetime", + docs=[{"_id": 1, "v": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": datetime(2024, 1, 1)}], + msg="$project field path should preserve datetime", + ), + StageTestCase( + "field_path_timestamp", + docs=[{"_id": 1, "v": Timestamp(1_234_567_890, 1)}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": Timestamp(1_234_567_890, 1)}], + msg="$project field path should preserve Timestamp", + ), + StageTestCase( + "field_path_binary", + docs=[{"_id": 1, "v": Binary(b"hello")}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": b"hello"}], + msg="$project field path should preserve Binary", + ), + StageTestCase( + "field_path_regex", + docs=[{"_id": 1, "v": Regex("abc", "i")}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": Regex("abc", "i")}], + msg="$project field path should preserve Regex", + ), + StageTestCase( + "field_path_code", + docs=[{"_id": 1, "v": Code("function() {}")}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": Code("function() {}")}], + msg="$project field path should preserve Code", + ), + StageTestCase( + "field_path_code_with_scope", + docs=[{"_id": 1, "v": Code("function() { return x; }", {"x": 1})}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": Code("function() { return x; }", {"x": 1})}], + msg="$project field path should preserve CodeWithScope", + ), + StageTestCase( + "field_path_minkey", + docs=[{"_id": 1, "v": MinKey()}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": MinKey()}], + msg="$project field path should preserve MinKey", + ), + StageTestCase( + "field_path_maxkey", + docs=[{"_id": 1, "v": MaxKey()}], + pipeline=[{"$project": {"r": "$v"}}], + expected=[{"_id": 1, "r": MaxKey()}], + msg="$project field path should preserve MaxKey", + ), +] + +PROJECT_BSON_TYPE_TESTS = ( + PROJECT_BSON_INCLUSION_TESTS + PROJECT_BSON_EXCLUSION_TESTS + PROJECT_BSON_FIELD_PATH_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_BSON_TYPE_TESTS)) +def test_project_bson_type_cases(collection: Any, test_case: StageTestCase): + """Test that $project preserves BSON types.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_computed.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_computed.py new file mode 100644 index 00000000..5b3c3126 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_computed.py @@ -0,0 +1,185 @@ +"""Tests for $project computed fields, $$REMOVE, and array literals.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from bson.son import SON + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import MISSING + +# Property [Computed / Expression Fields]: expressions, field path references, +# $literal, and special values produce computed field values in the output. +PROJECT_COMPUTED_TESTS: list[StageTestCase] = [ + StageTestCase( + "computed_field_path_ref", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"result": "$a"}}], + expected=[{"_id": 1, "result": 10}], + msg="$project should copy a field via field path reference", + ), + StageTestCase( + "computed_literal_zero", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"flag": {"$literal": 0}}}], + expected=[{"_id": 1, "flag": 0}], + msg="$project $literal should prevent 0 from being interpreted as exclusion", + ), + StageTestCase( + "computed_literal_true", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"flag": {"$literal": True}}}], + expected=[{"_id": 1, "flag": True}], + msg="$project $literal should prevent true from being interpreted as inclusion", + ), + StageTestCase( + "computed_literal_false", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"flag": {"$literal": False}}}], + expected=[{"_id": 1, "flag": False}], + msg="$project $literal should prevent false from being interpreted as exclusion", + ), + StageTestCase( + "computed_null_value", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": 1, "x": None}}], + expected=[{"_id": 1, "a": 10, "x": None}], + msg="$project should treat null as a computed expression setting the field to null", + ), + StageTestCase( + "computed_missing_field_ref_omitted", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"result": MISSING}}], + expected=[{"_id": 1}], + msg="$project should omit a field when it references a missing field", + ), + StageTestCase( + "computed_self_reference", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": "$a"}}], + expected=[{"_id": 1, "a": 10}], + msg="$project self-reference should read from the original document", + ), + StageTestCase( + "computed_cross_reference", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", "$b"), ("b", "$a")])}], + expected=[{"_id": 1, "a": 20, "b": 10}], + msg=( + "$project cross-reference should read from the original document," + " not intermediate projection state" + ), + ), +] + +# Property [$$REMOVE Behavior]: $$REMOVE removes a field from output and +# interacts correctly with conditional expressions and inclusion mode. +PROJECT_REMOVE_TESTS: list[StageTestCase] = [ + StageTestCase( + "remove_basic", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": 1, "b": "$$REMOVE"}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should remove a field when its value is $$REMOVE", + ), + StageTestCase( + "remove_cond_true", + docs=[{"_id": 1, "a": 10, "show": True}], + pipeline=[ + {"$project": {"result": {"$cond": [{"$eq": ["$show", True]}, "$a", "$$REMOVE"]}}} + ], + expected=[{"_id": 1, "result": 10}], + msg="$project $$REMOVE with $cond should keep field when condition is true", + ), + StageTestCase( + "remove_cond_false", + docs=[{"_id": 1, "a": 20, "show": False}], + pipeline=[ + {"$project": {"result": {"$cond": [{"$eq": ["$show", True]}, "$a", "$$REMOVE"]}}} + ], + expected=[{"_id": 1}], + msg="$project $$REMOVE with $cond should remove field when condition is false", + ), + StageTestCase( + "remove_dotted_path", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": 1, "b": "$$REMOVE.x.y"}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should remove a field when $$REMOVE has additional path components", + ), + StageTestCase( + "remove_inside_array_becomes_null", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"arr": ["$a", "$$REMOVE", "hello"]}}], + expected=[{"_id": 1, "arr": [10, None, "hello"]}], + msg="$project $$REMOVE inside an array literal should become null", + ), + StageTestCase( + "remove_compatible_with_inclusion", + docs=[{"_id": 1, "a": 10, "b": 20, "c": 30}], + pipeline=[{"$project": {"a": 1, "b": "$$REMOVE", "c": 1}}], + expected=[{"_id": 1, "a": 10, "c": 30}], + msg=( + "$project $$REMOVE should be compatible with inclusion mode" + " without triggering the mixing restriction" + ), + ), +] + +# Property [Array Literal Fields]: square bracket syntax creates new array +# fields from field references and literal values. +PROJECT_ARRAY_LITERAL_TESTS: list[StageTestCase] = [ + StageTestCase( + "array_literal_field_refs", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"arr": ["$a", "$b"]}}], + expected=[{"_id": 1, "arr": [10, 20]}], + msg="$project should create a new array field from field references", + ), + StageTestCase( + "array_literal_missing_ref_becomes_null", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"arr": ["$a", MISSING]}}], + expected=[{"_id": 1, "arr": [10, None]}], + msg="$project should substitute null for missing field references inside array literals", + ), + StageTestCase( + "array_literal_empty", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"arr": []}}], + expected=[{"_id": 1, "arr": []}], + msg="$project should treat an empty array as a computed field producing an empty array", + ), +] + +PROJECT_COMPUTED_ALL_TESTS = ( + PROJECT_COMPUTED_TESTS + PROJECT_REMOVE_TESTS + PROJECT_ARRAY_LITERAL_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_COMPUTED_ALL_TESTS)) +def test_project_computed(collection: Any, test_case: StageTestCase) -> None: + """Test $project computed fields.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_errors.py new file mode 100644 index 00000000..8143c248 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_errors.py @@ -0,0 +1,540 @@ +"""Tests for $project error cases.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) +from bson.son import SON + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + EXPRESSION_ARITY_ERROR, + FIELD_PATH_DOLLAR_PREFIX_ERROR, + FIELD_PATH_EMPTY_COMPONENT_ERROR, + FIELD_PATH_EMPTY_ERROR, + FIELD_PATH_TRAILING_DOT_ERROR, + OVERFLOW_ERROR, + PROJECT_EMPTY_SPEC_ERROR, + PROJECT_EMPTY_SUB_PROJECTION_ERROR, + PROJECT_EXCLUSION_IN_INCLUSION_ERROR, + PROJECT_INCLUSION_IN_EXCLUSION_ERROR, + PROJECT_OPERATOR_IN_EXCLUSION_ERROR, + PROJECT_PATH_COLLISION_CHILD_AFTER_PARENT_ERROR, + PROJECT_PATH_COLLISION_PARENT_AFTER_CHILD_ERROR, + PROJECT_SPEC_NOT_OBJECT_ERROR, + PROJECT_UNKNOWN_EXPRESSION_ERROR, + PROJECT_VALUE_IN_EXCLUSION_ERROR, + UNRECOGNIZED_EXPRESSION_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Mixing Restriction Errors]: combining inclusion and exclusion +# flags, or exclusion with expressions, in the same projection produces an +# error. +# Note: SON is used instead of plain dicts to make key order explicit, since +# the specific error code depends on which field the server encounters first. +PROJECT_MIXING_RESTRICTION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "mixing_exclusion_after_inclusion", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 1), ("b", 0)])}], + error_code=PROJECT_EXCLUSION_IN_INCLUSION_ERROR, + msg="$project should reject exclusion on a non-_id field in inclusion projection", + ), + StageTestCase( + "mixing_inclusion_after_exclusion", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 0), ("b", 1)])}], + error_code=PROJECT_INCLUSION_IN_EXCLUSION_ERROR, + msg="$project should reject inclusion on a field in exclusion projection", + ), + StageTestCase( + "mixing_exclusion_with_expression", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 0), ("b", {"$add": [1, 2]})])}], + error_code=PROJECT_OPERATOR_IN_EXCLUSION_ERROR, + msg="$project should reject expression other than $meta in exclusion projection", + ), + StageTestCase( + "mixing_exclusion_with_field_path", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 0), ("b", "$a")])}], + error_code=PROJECT_VALUE_IN_EXCLUSION_ERROR, + msg="$project should reject field path reference in exclusion projection", + ), + StageTestCase( + "mixing_exclusion_with_null", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 0), ("b", None)])}], + error_code=PROJECT_VALUE_IN_EXCLUSION_ERROR, + msg="$project should reject null computed value in exclusion projection", + ), + StageTestCase( + "mixing_exclusion_with_array_literal", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 0), ("b", ["$a"])])}], + error_code=PROJECT_VALUE_IN_EXCLUSION_ERROR, + msg="$project should reject array literal in exclusion projection", + ), + StageTestCase( + "mixing_exclusion_with_remove", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 0), ("b", "$$REMOVE")])}], + error_code=PROJECT_VALUE_IN_EXCLUSION_ERROR, + msg="$project should reject $$REMOVE in exclusion projection", + ), + StageTestCase( + "mixing_id_inclusion_exclusion_expression", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("_id", 1), ("a", 0), ("b", {"$add": [1, 2]})])}], + error_code=PROJECT_OPERATOR_IN_EXCLUSION_ERROR, + msg="$project should reject expression in exclusion mode even with _id: 1", + ), + StageTestCase( + "mixing_id_expression_with_exclusion", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("_id", {"$add": [1, 2]}), ("a", 0)])}], + error_code=PROJECT_EXCLUSION_IN_INCLUSION_ERROR, + msg="$project should reject exclusion when _id is a computed expression", + ), + StageTestCase( + "mixing_sub_proj_exclusion_after_inclusion", + docs=[{"_id": 1, "a": {"x": 10, "y": 20}}], + pipeline=[{"$project": {"a": SON([("x", 1), ("y", 0)])}}], + error_code=PROJECT_EXCLUSION_IN_INCLUSION_ERROR, + msg="$project sub-projection should reject exclusion after inclusion", + ), + StageTestCase( + "mixing_sub_proj_inclusion_after_exclusion", + docs=[{"_id": 1, "a": {"x": 10, "y": 20}}], + pipeline=[{"$project": {"a": SON([("x", 0), ("y", 1)])}}], + error_code=PROJECT_INCLUSION_IN_EXCLUSION_ERROR, + msg="$project sub-projection should reject inclusion after exclusion", + ), + StageTestCase( + "mixing_sub_proj_expression_in_exclusion", + docs=[{"_id": 1, "a": {"x": 10, "y": 20}}], + pipeline=[{"$project": {"a": SON([("x", 0), ("y", {"$add": [1, 2]})])}}], + error_code=PROJECT_OPERATOR_IN_EXCLUSION_ERROR, + msg="$project sub-projection should reject operator expression in exclusion mode", + ), + StageTestCase( + "mixing_sub_proj_field_path_in_exclusion", + docs=[{"_id": 1, "a": {"x": 10, "y": 20}}], + pipeline=[{"$project": {"a": SON([("x", 0), ("y", "$a.x")])}}], + error_code=PROJECT_VALUE_IN_EXCLUSION_ERROR, + msg="$project sub-projection should reject field path in exclusion mode", + ), +] + +# Property [Path Collision Errors]: specifying a parent path and a child path +# in the same projection produces an error regardless of order or mode. +PROJECT_PATH_COLLISION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "collision_parent_after_child_inclusion", + docs=[{"_id": 1, "a": {"b": 1}}], + pipeline=[{"$project": SON([("a.b", 1), ("a", 1)])}], + error_code=PROJECT_PATH_COLLISION_PARENT_AFTER_CHILD_ERROR, + msg="$project should reject parent path after child path in inclusion mode", + ), + StageTestCase( + "collision_child_after_parent_inclusion", + docs=[{"_id": 1, "a": {"b": 1}}], + pipeline=[{"$project": SON([("a", 1), ("a.b", 1)])}], + error_code=PROJECT_PATH_COLLISION_CHILD_AFTER_PARENT_ERROR, + msg="$project should reject child path after parent path in inclusion mode", + ), + StageTestCase( + "collision_parent_after_child_exclusion", + docs=[{"_id": 1, "a": {"b": 1}}], + pipeline=[{"$project": SON([("a.b", 0), ("a", 0)])}], + error_code=PROJECT_PATH_COLLISION_PARENT_AFTER_CHILD_ERROR, + msg="$project should reject parent path after child path in exclusion mode", + ), + StageTestCase( + "collision_child_after_parent_exclusion", + docs=[{"_id": 1, "a": {"b": 1}}], + pipeline=[{"$project": SON([("a", 0), ("a.b", 0)])}], + error_code=PROJECT_PATH_COLLISION_CHILD_AFTER_PARENT_ERROR, + msg="$project should reject child path after parent path in exclusion mode", + ), + StageTestCase( + "collision_deep_parent_after_child", + docs=[{"_id": 1, "a": {"b": {"c": 1}}}], + pipeline=[{"$project": SON([("a.b.c", 1), ("a", 1)])}], + error_code=PROJECT_PATH_COLLISION_PARENT_AFTER_CHILD_ERROR, + msg="$project should reject grandparent path after deeply nested child path", + ), + StageTestCase( + "collision_deep_child_after_parent", + docs=[{"_id": 1, "a": {"b": {"c": 1}}}], + pipeline=[{"$project": SON([("a", 1), ("a.b.c", 1)])}], + error_code=PROJECT_PATH_COLLISION_CHILD_AFTER_PARENT_ERROR, + msg="$project should reject deeply nested child path after grandparent path", + ), + StageTestCase( + "collision_mid_level_parent_after_child", + docs=[{"_id": 1, "a": {"b": {"c": 1}}}], + pipeline=[{"$project": SON([("a.b.c", 1), ("a.b", 1)])}], + error_code=PROJECT_PATH_COLLISION_PARENT_AFTER_CHILD_ERROR, + msg="$project should reject mid-level parent path after child path", + ), + StageTestCase( + "collision_mid_level_child_after_parent", + docs=[{"_id": 1, "a": {"b": {"c": 1}}}], + pipeline=[{"$project": SON([("a.b", 1), ("a.b.c", 1)])}], + error_code=PROJECT_PATH_COLLISION_CHILD_AFTER_PARENT_ERROR, + msg="$project should reject child path after mid-level parent path", + ), +] + +# Property [Argument Validation Errors]: non-document specifications and +# empty document specifications produce errors. +PROJECT_ARGUMENT_VALIDATION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "arg_validation_string", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": "hello"}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a string specification", + ), + StageTestCase( + "arg_validation_int", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": 42}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject an integer specification", + ), + StageTestCase( + "arg_validation_float", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": 3.14}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a float specification", + ), + StageTestCase( + "arg_validation_bool", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": True}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a boolean specification", + ), + StageTestCase( + "arg_validation_null", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": None}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a null specification", + ), + StageTestCase( + "arg_validation_array", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": [1, 2]}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject an array specification", + ), + StageTestCase( + "arg_validation_int64", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": Int64(42)}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject an Int64 specification", + ), + StageTestCase( + "arg_validation_decimal128", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": Decimal128("3.14")}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a Decimal128 specification", + ), + StageTestCase( + "arg_validation_objectid", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": ObjectId()}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject an ObjectId specification", + ), + StageTestCase( + "arg_validation_datetime", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a datetime specification", + ), + StageTestCase( + "arg_validation_binary", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": Binary(b"\x01")}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a Binary specification", + ), + StageTestCase( + "arg_validation_regex", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": Regex("^abc")}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a Regex specification", + ), + StageTestCase( + "arg_validation_timestamp", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": Timestamp(1, 1)}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a Timestamp specification", + ), + StageTestCase( + "arg_validation_minkey", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": MinKey()}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a MinKey specification", + ), + StageTestCase( + "arg_validation_maxkey", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": MaxKey()}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a MaxKey specification", + ), + StageTestCase( + "arg_validation_code", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": Code("function(){}")}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a Code specification", + ), + StageTestCase( + "arg_validation_codewithscope", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": Code("function(){}", {"x": 1})}], + error_code=PROJECT_SPEC_NOT_OBJECT_ERROR, + msg="$project should reject a CodeWithScope specification", + ), + StageTestCase( + "arg_validation_empty_doc", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {}}], + error_code=PROJECT_EMPTY_SPEC_ERROR, + msg="$project should reject an empty document specification", + ), +] + +# Property [Field Name Validation Errors]: invalid field names produce errors. +PROJECT_FIELD_NAME_VALIDATION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "field_name_empty_string", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"": 1}}], + error_code=FIELD_PATH_EMPTY_ERROR, + msg="$project should reject an empty string field name", + ), + StageTestCase( + "field_name_dollar_prefix", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"$bad": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should reject a $-prefixed field name", + ), + StageTestCase( + "field_name_leading_dot", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {".a": 1}}], + error_code=FIELD_PATH_EMPTY_COMPONENT_ERROR, + msg="$project should reject a field path with a leading dot", + ), + StageTestCase( + "field_name_trailing_dot", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a.": 1}}], + error_code=FIELD_PATH_TRAILING_DOT_ERROR, + msg="$project should reject a field path with a trailing dot", + ), + StageTestCase( + "field_name_double_dot", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a..b": 1}}], + error_code=FIELD_PATH_EMPTY_COMPONENT_ERROR, + msg="$project should reject a field path with a double dot", + ), + StageTestCase( + "field_name_empty_sub_projection", + docs=[{"_id": 1, "a": {"x": 10}}], + pipeline=[{"$project": {"a": {}}}], + error_code=PROJECT_EMPTY_SUB_PROJECTION_ERROR, + msg="$project should reject an empty sub-projection", + ), + StageTestCase( + "field_name_dollar_natural", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"$natural": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should reject $natural as a field path", + ), + StageTestCase( + "field_name_bare_dollar", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"$": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should reject a bare $ as a field path", + ), + StageTestCase( + "field_name_bare_double_dollar", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"$$": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should reject a bare $$ as a field path", + ), + StageTestCase( + "field_name_nested_leading_dollar", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a.$b": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should reject a leading $ in a nested path component", + ), + StageTestCase( + "field_name_depth_exceeds_200", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {".".join(["a"] * 201): 1}}], + error_code=OVERFLOW_ERROR, + msg="$project should reject a field path exceeding 200 components", + ), +] + +# Property [Expression Validation Errors]: unrecognized or misused expression +# operators in a sub-document produce errors. +PROJECT_EXPRESSION_VALIDATION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "expr_unrecognized_operator", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": {"$bogus": 1}}}], + error_code=PROJECT_UNKNOWN_EXPRESSION_ERROR, + msg="$project should reject an unrecognized $-prefixed operator", + ), + StageTestCase( + "expr_multiple_dollar_keys", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": SON([("$add", [1, 2]), ("$subtract", [3, 1])])}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should reject multiple $-prefixed keys in a sub-document", + ), + StageTestCase( + "expr_mixed_dollar_and_non_dollar_keys", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": SON([("$add", [1, 2]), ("x", 1)])}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should reject mixed $ and non-$ keys in a sub-document", + ), + StageTestCase( + "expr_elemmatch_in_aggregation", + docs=[{"_id": 1, "a": [{"x": 1}, {"x": 2}]}], + pipeline=[{"$project": {"a": {"$elemMatch": {"x": 1}}}}], + error_code=UNRECOGNIZED_EXPRESSION_ERROR, + msg="$project should reject $elemMatch in aggregation context", + ), + StageTestCase( + "expr_slice_find_syntax", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$project": {"a": {"$slice": 2}}}], + error_code=EXPRESSION_ARITY_ERROR, + msg="$project should reject $slice with find-projection syntax", + ), +] + +# Property [Error Precedence]: when multiple errors exist, field name +# validation errors take priority over mixing restriction errors. +PROJECT_ERROR_PRECEDENCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "precedence_first_field_determines_error_dollar_first", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": SON([("$bad", 1), (".a", 1)])}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$project should report the error from the first invalid field in document order", + ), + StageTestCase( + "precedence_first_field_determines_error_dot_first", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": SON([(".a", 1), ("$bad", 1)])}], + error_code=FIELD_PATH_EMPTY_COMPONENT_ERROR, + msg=( + "$project should report the leading-dot error when it appears" + " before the $-prefix error in document order" + ), + ), + StageTestCase( + "precedence_empty_before_dollar", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": SON([("", 1), ("$bad", 1)])}], + error_code=FIELD_PATH_EMPTY_ERROR, + msg=( + "$project should report empty string error when it appears" + " before a $-prefix error in document order" + ), + ), + StageTestCase( + "precedence_dollar_before_empty", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": SON([("$bad", 1), ("", 1)])}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg=( + "$project should report $-prefix error when it appears" + " before an empty string error in document order" + ), + ), + StageTestCase( + "precedence_field_name_over_mixing", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": SON([("a", 1), ("", 0)])}], + error_code=FIELD_PATH_EMPTY_ERROR, + msg=( + "$project should report field name validation error even" + " when the same field would also cause a mixing restriction error" + ), + ), +] + +PROJECT_ERROR_TESTS = ( + PROJECT_MIXING_RESTRICTION_ERROR_TESTS + + PROJECT_PATH_COLLISION_ERROR_TESTS + + PROJECT_ARGUMENT_VALIDATION_ERROR_TESTS + + PROJECT_FIELD_NAME_VALIDATION_ERROR_TESTS + + PROJECT_EXPRESSION_VALIDATION_ERROR_TESTS + + PROJECT_ERROR_PRECEDENCE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_ERROR_TESTS)) +def test_project_errors(collection: Any, test_case: StageTestCase) -> None: + """Test $project error cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_expressions.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_expressions.py new file mode 100644 index 00000000..1a225cd2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_expressions.py @@ -0,0 +1,1182 @@ +"""Tests that each expression operator works within $project.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any +from uuid import UUID + +import pytest +from bson import Binary, Decimal128, Int64, ObjectId, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Expression Support]: each expression operator produces the correct +# result when used as a computed field in $project. +PROJECT_EXPRESSION_TESTS: list[StageTestCase] = [ + # Arithmetic. + StageTestCase( + "expr_abs", + docs=[{"_id": 1, "a": -5}], + pipeline=[{"$project": {"r": {"$abs": "$a"}}}], + expected=[{"_id": 1, "r": 5}], + msg="$abs should work in $project", + ), + StageTestCase( + "expr_add", + docs=[{"_id": 1, "a": 3, "b": 4}], + pipeline=[{"$project": {"r": {"$add": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 7}], + msg="$add should work in $project", + ), + StageTestCase( + "expr_ceil", + docs=[{"_id": 1, "a": 2.3}], + pipeline=[{"$project": {"r": {"$ceil": "$a"}}}], + expected=[{"_id": 1, "r": 3.0}], + msg="$ceil should work in $project", + ), + StageTestCase( + "expr_divide", + docs=[{"_id": 1, "a": 10, "b": 4}], + pipeline=[{"$project": {"r": {"$divide": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 2.5}], + msg="$divide should work in $project", + ), + StageTestCase( + "expr_exp", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$project": {"r": {"$exp": "$a"}}}], + expected=[{"_id": 1, "r": 1.0}], + msg="$exp should work in $project", + ), + StageTestCase( + "expr_floor", + docs=[{"_id": 1, "a": 2.7}], + pipeline=[{"$project": {"r": {"$floor": "$a"}}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$floor should work in $project", + ), + StageTestCase( + "expr_ln", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"r": {"$ln": "$a"}}}], + expected=[{"_id": 1, "r": 2.302585092994046}], + msg="$ln should work in $project", + ), + StageTestCase( + "expr_log", + docs=[{"_id": 1, "a": 100, "b": 10}], + pipeline=[{"$project": {"r": {"$log": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$log should work in $project", + ), + StageTestCase( + "expr_log10", + docs=[{"_id": 1, "a": 1000}], + pipeline=[{"$project": {"r": {"$log10": "$a"}}}], + expected=[{"_id": 1, "r": 3.0}], + msg="$log10 should work in $project", + ), + StageTestCase( + "expr_mod", + docs=[{"_id": 1, "a": 10, "b": 3}], + pipeline=[{"$project": {"r": {"$mod": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 1}], + msg="$mod should work in $project", + ), + StageTestCase( + "expr_multiply", + docs=[{"_id": 1, "a": 3, "b": 4}], + pipeline=[{"$project": {"r": {"$multiply": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 12}], + msg="$multiply should work in $project", + ), + StageTestCase( + "expr_pow", + docs=[{"_id": 1, "a": 2, "b": 3}], + pipeline=[{"$project": {"r": {"$pow": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 8}], + msg="$pow should work in $project", + ), + StageTestCase( + "expr_round", + docs=[{"_id": 1, "a": 2.567}], + pipeline=[{"$project": {"r": {"$round": ["$a", 1]}}}], + expected=[{"_id": 1, "r": 2.6}], + msg="$round should work in $project", + ), + StageTestCase( + "expr_sqrt", + docs=[{"_id": 1, "a": 9}], + pipeline=[{"$project": {"r": {"$sqrt": "$a"}}}], + expected=[{"_id": 1, "r": 3.0}], + msg="$sqrt should work in $project", + ), + StageTestCase( + "expr_subtract", + docs=[{"_id": 1, "a": 10, "b": 3}], + pipeline=[{"$project": {"r": {"$subtract": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 7}], + msg="$subtract should work in $project", + ), + StageTestCase( + "expr_trunc", + docs=[{"_id": 1, "a": 2.9}], + pipeline=[{"$project": {"r": {"$trunc": "$a"}}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$trunc should work in $project", + ), + StageTestCase( + "expr_sigmoid", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$project": {"r": {"$sigmoid": "$a"}}}], + expected=[{"_id": 1, "r": 0.5}], + msg="$sigmoid should work in $project", + ), + # Array. + StageTestCase( + "expr_arrayElemAt", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"r": {"$arrayElemAt": ["$a", 1]}}}], + expected=[{"_id": 1, "r": 20}], + msg="$arrayElemAt should work in $project", + ), + StageTestCase( + "expr_arrayToObject", + docs=[{"_id": 1, "a": [["k", "v"]]}], + pipeline=[{"$project": {"r": {"$arrayToObject": "$a"}}}], + expected=[{"_id": 1, "r": {"k": "v"}}], + msg="$arrayToObject should work in $project", + ), + StageTestCase( + "expr_concatArrays", + docs=[{"_id": 1, "a": [1], "b": [2]}], + pipeline=[{"$project": {"r": {"$concatArrays": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$concatArrays should work in $project", + ), + StageTestCase( + "expr_filter", + docs=[{"_id": 1, "a": [1, 2, 3, 4]}], + pipeline=[ + {"$project": {"r": {"$filter": {"input": "$a", "cond": {"$gt": ["$$this", 2]}}}}} + ], + expected=[{"_id": 1, "r": [3, 4]}], + msg="$filter should work in $project", + ), + StageTestCase( + "expr_firstN", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$project": {"r": {"$firstN": {"input": "$a", "n": 2}}}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$firstN should work in $project", + ), + StageTestCase( + "expr_in", + docs=[{"_id": 1, "a": 2}], + pipeline=[{"$project": {"r": {"$in": ["$a", [1, 2, 3]]}}}], + expected=[{"_id": 1, "r": True}], + msg="$in should work in $project", + ), + StageTestCase( + "expr_indexOfArray", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"r": {"$indexOfArray": ["$a", 20]}}}], + expected=[{"_id": 1, "r": 1}], + msg="$indexOfArray should work in $project", + ), + StageTestCase( + "expr_isArray", + docs=[{"_id": 1, "a": [1, 2]}], + pipeline=[{"$project": {"r": {"$isArray": "$a"}}}], + expected=[{"_id": 1, "r": True}], + msg="$isArray should work in $project", + ), + StageTestCase( + "expr_lastN", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$project": {"r": {"$lastN": {"input": "$a", "n": 2}}}}], + expected=[{"_id": 1, "r": [2, 3]}], + msg="$lastN should work in $project", + ), + StageTestCase( + "expr_map", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[ + {"$project": {"r": {"$map": {"input": "$a", "in": {"$multiply": ["$$this", 2]}}}}} + ], + expected=[{"_id": 1, "r": [2, 4, 6]}], + msg="$map should work in $project", + ), + StageTestCase( + "expr_maxN_array", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$project": {"r": {"$maxN": {"input": "$a", "n": 2}}}}], + expected=[{"_id": 1, "r": [3, 2]}], + msg="$maxN should work in $project", + ), + StageTestCase( + "expr_minN_array", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$project": {"r": {"$minN": {"input": "$a", "n": 2}}}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$minN should work in $project", + ), + StageTestCase( + "expr_objectToArray", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$project": {"r": {"$objectToArray": "$a"}}}], + expected=[{"_id": 1, "r": [{"k": "x", "v": 1}]}], + msg="$objectToArray should work in $project", + ), + StageTestCase( + "expr_range", + docs=[{"_id": 1}], + pipeline=[{"$project": {"r": {"$range": [0, 3]}}}], + expected=[{"_id": 1, "r": [0, 1, 2]}], + msg="$range should work in $project", + ), + StageTestCase( + "expr_reduce", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[ + { + "$project": { + "r": { + "$reduce": { + "input": "$a", + "initialValue": 0, + "in": {"$add": ["$$value", "$$this"]}, + } + } + } + } + ], + expected=[{"_id": 1, "r": 6}], + msg="$reduce should work in $project", + ), + StageTestCase( + "expr_reverseArray", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$project": {"r": {"$reverseArray": "$a"}}}], + expected=[{"_id": 1, "r": [3, 2, 1]}], + msg="$reverseArray should work in $project", + ), + StageTestCase( + "expr_size", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$project": {"r": {"$size": "$a"}}}], + expected=[{"_id": 1, "r": 3}], + msg="$size should work in $project", + ), + StageTestCase( + "expr_slice", + docs=[{"_id": 1, "a": [1, 2, 3, 4]}], + pipeline=[{"$project": {"r": {"$slice": ["$a", 2]}}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$slice should work in $project", + ), + StageTestCase( + "expr_sortArray", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$project": {"r": {"$sortArray": {"input": "$a", "sortBy": 1}}}}], + expected=[{"_id": 1, "r": [1, 2, 3]}], + msg="$sortArray should work in $project", + ), + StageTestCase( + "expr_zip", + docs=[{"_id": 1, "a": [1, 2], "b": [3, 4]}], + pipeline=[{"$project": {"r": {"$zip": {"inputs": ["$a", "$b"]}}}}], + expected=[{"_id": 1, "r": [[1, 3], [2, 4]]}], + msg="$zip should work in $project", + ), + # Bitwise. + StageTestCase( + "expr_bitAnd", + docs=[{"_id": 1, "a": 7, "b": 3}], + pipeline=[{"$project": {"r": {"$bitAnd": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 3}], + msg="$bitAnd should work in $project", + ), + StageTestCase( + "expr_bitNot", + docs=[{"_id": 1, "a": Int64(5)}], + pipeline=[{"$project": {"r": {"$bitNot": "$a"}}}], + expected=[{"_id": 1, "r": Int64(-6)}], + msg="$bitNot should work in $project", + ), + StageTestCase( + "expr_bitOr", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$project": {"r": {"$bitOr": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 7}], + msg="$bitOr should work in $project", + ), + StageTestCase( + "expr_bitXor", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$project": {"r": {"$bitXor": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 6}], + msg="$bitXor should work in $project", + ), + # Boolean. + StageTestCase( + "expr_and", + docs=[{"_id": 1, "a": True, "b": False}], + pipeline=[{"$project": {"r": {"$and": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": False}], + msg="$and should work in $project", + ), + StageTestCase( + "expr_not", + docs=[{"_id": 1, "a": False}], + pipeline=[{"$project": {"r": {"$not": ["$a"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$not should work in $project", + ), + StageTestCase( + "expr_or", + docs=[{"_id": 1, "a": False, "b": True}], + pipeline=[{"$project": {"r": {"$or": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$or should work in $project", + ), + # Comparisons. + StageTestCase( + "expr_cmp", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$project": {"r": {"$cmp": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 1}], + msg="$cmp should work in $project", + ), + StageTestCase( + "expr_eq", + docs=[{"_id": 1, "a": 5, "b": 5}], + pipeline=[{"$project": {"r": {"$eq": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$eq should work in $project", + ), + StageTestCase( + "expr_gt", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$project": {"r": {"$gt": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$gt should work in $project", + ), + StageTestCase( + "expr_gte", + docs=[{"_id": 1, "a": 5, "b": 5}], + pipeline=[{"$project": {"r": {"$gte": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$gte should work in $project", + ), + StageTestCase( + "expr_lt", + docs=[{"_id": 1, "a": 3, "b": 5}], + pipeline=[{"$project": {"r": {"$lt": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$lt should work in $project", + ), + StageTestCase( + "expr_lte", + docs=[{"_id": 1, "a": 5, "b": 5}], + pipeline=[{"$project": {"r": {"$lte": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$lte should work in $project", + ), + StageTestCase( + "expr_ne", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$project": {"r": {"$ne": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$ne should work in $project", + ), + # Conditional. + StageTestCase( + "expr_cond", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"r": {"$cond": [{"$gt": ["$a", 5]}, "big", "small"]}}}], + expected=[{"_id": 1, "r": "big"}], + msg="$cond should work in $project", + ), + StageTestCase( + "expr_ifNull", + docs=[{"_id": 1, "a": None}], + pipeline=[{"$project": {"r": {"$ifNull": ["$a", "default"]}}}], + expected=[{"_id": 1, "r": "default"}], + msg="$ifNull should work in $project", + ), + StageTestCase( + "expr_switch", + docs=[{"_id": 1, "a": 2}], + pipeline=[ + { + "$project": { + "r": { + "$switch": { + "branches": [{"case": {"$eq": ["$a", 2]}, "then": "two"}], + "default": "other", + } + } + } + } + ], + expected=[{"_id": 1, "r": "two"}], + msg="$switch should work in $project", + ), + # Date. + StageTestCase( + "expr_dateAdd", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[ + {"$project": {"r": {"$dateAdd": {"startDate": "$d", "unit": "day", "amount": 1}}}} + ], + expected=[{"_id": 1, "r": datetime(2024, 1, 2)}], + msg="$dateAdd should work in $project", + ), + StageTestCase( + "expr_dateDiff", + docs=[ + { + "_id": 1, + "a": datetime(2024, 1, 1, tzinfo=timezone.utc), + "b": datetime(2024, 1, 4, tzinfo=timezone.utc), + } + ], + pipeline=[ + {"$project": {"r": {"$dateDiff": {"startDate": "$a", "endDate": "$b", "unit": "day"}}}} + ], + expected=[{"_id": 1, "r": Int64(3)}], + msg="$dateDiff should work in $project", + ), + StageTestCase( + "expr_dateFromParts", + docs=[{"_id": 1}], + pipeline=[{"$project": {"r": {"$dateFromParts": {"year": 2024, "month": 6, "day": 15}}}}], + expected=[{"_id": 1, "r": datetime(2024, 6, 15)}], + msg="$dateFromParts should work in $project", + ), + StageTestCase( + "expr_dateFromString", + docs=[{"_id": 1}], + pipeline=[{"$project": {"r": {"$dateFromString": {"dateString": "2024-01-01"}}}}], + expected=[{"_id": 1, "r": datetime(2024, 1, 1)}], + msg="$dateFromString should work in $project", + ), + StageTestCase( + "expr_dateSubtract", + docs=[{"_id": 1, "d": datetime(2024, 1, 3, tzinfo=timezone.utc)}], + pipeline=[ + {"$project": {"r": {"$dateSubtract": {"startDate": "$d", "unit": "day", "amount": 1}}}} + ], + expected=[{"_id": 1, "r": datetime(2024, 1, 2)}], + msg="$dateSubtract should work in $project", + ), + StageTestCase( + "expr_dateToParts", + docs=[{"_id": 1, "d": datetime(2024, 3, 15, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$dateToParts": {"date": "$d"}}}}], + expected=[ + { + "_id": 1, + "r": { + "year": 2024, + "month": 3, + "day": 15, + "hour": 0, + "minute": 0, + "second": 0, + "millisecond": 0, + }, + } + ], + msg="$dateToParts should work in $project", + ), + StageTestCase( + "expr_dateToString", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$dateToString": {"date": "$d", "format": "%Y-%m-%d"}}}}], + expected=[{"_id": 1, "r": "2024-01-01"}], + msg="$dateToString should work in $project", + ), + StageTestCase( + "expr_dateTrunc", + docs=[{"_id": 1, "d": datetime(2024, 3, 15, 10, 30, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$dateTrunc": {"date": "$d", "unit": "month"}}}}], + expected=[{"_id": 1, "r": datetime(2024, 3, 1)}], + msg="$dateTrunc should work in $project", + ), + StageTestCase( + "expr_dayOfMonth", + docs=[{"_id": 1, "d": datetime(2024, 3, 15, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$dayOfMonth": "$d"}}}], + expected=[{"_id": 1, "r": 15}], + msg="$dayOfMonth should work in $project", + ), + StageTestCase( + "expr_dayOfWeek", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$dayOfWeek": "$d"}}}], + expected=[{"_id": 1, "r": 2}], + msg="$dayOfWeek should work in $project", + ), + StageTestCase( + "expr_dayOfYear", + docs=[{"_id": 1, "d": datetime(2024, 2, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$dayOfYear": "$d"}}}], + expected=[{"_id": 1, "r": 32}], + msg="$dayOfYear should work in $project", + ), + StageTestCase( + "expr_hour", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 14, 0, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$hour": "$d"}}}], + expected=[{"_id": 1, "r": 14}], + msg="$hour should work in $project", + ), + StageTestCase( + "expr_isoDayOfWeek", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$isoDayOfWeek": "$d"}}}], + expected=[{"_id": 1, "r": 1}], + msg="$isoDayOfWeek should work in $project", + ), + StageTestCase( + "expr_isoWeek", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$isoWeek": "$d"}}}], + expected=[{"_id": 1, "r": 1}], + msg="$isoWeek should work in $project", + ), + StageTestCase( + "expr_isoWeekYear", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$isoWeekYear": "$d"}}}], + expected=[{"_id": 1, "r": Int64(2024)}], + msg="$isoWeekYear should work in $project", + ), + StageTestCase( + "expr_millisecond", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 0, 0, 0, 123000, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$millisecond": "$d"}}}], + expected=[{"_id": 1, "r": 123}], + msg="$millisecond should work in $project", + ), + StageTestCase( + "expr_minute", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 10, 45, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$minute": "$d"}}}], + expected=[{"_id": 1, "r": 45}], + msg="$minute should work in $project", + ), + StageTestCase( + "expr_month", + docs=[{"_id": 1, "d": datetime(2024, 7, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$month": "$d"}}}], + expected=[{"_id": 1, "r": 7}], + msg="$month should work in $project", + ), + StageTestCase( + "expr_second", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 0, 0, 30, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$second": "$d"}}}], + expected=[{"_id": 1, "r": 30}], + msg="$second should work in $project", + ), + StageTestCase( + "expr_toDate", + docs=[{"_id": 1, "a": Int64(1704067200000)}], + pipeline=[{"$project": {"r": {"$toDate": "$a"}}}], + expected=[{"_id": 1, "r": datetime(2024, 1, 1)}], + msg="$toDate should work in $project", + ), + StageTestCase( + "expr_week", + docs=[{"_id": 1, "d": datetime(2024, 1, 15, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$week": "$d"}}}], + expected=[{"_id": 1, "r": 2}], + msg="$week should work in $project", + ), + StageTestCase( + "expr_year", + docs=[{"_id": 1, "d": datetime(2024, 6, 1, tzinfo=timezone.utc)}], + pipeline=[{"$project": {"r": {"$year": "$d"}}}], + expected=[{"_id": 1, "r": 2024}], + msg="$year should work in $project", + ), + # Misc. + StageTestCase( + "expr_binarySize", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$binarySize": "$a"}}}], + expected=[{"_id": 1, "r": 5}], + msg="$binarySize should work in $project", + ), + StageTestCase( + "expr_bsonSize", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$project": {"r": {"$bsonSize": "$a"}}}], + expected=[{"_id": 1, "r": 12}], + msg="$bsonSize should work in $project", + ), + StageTestCase( + "expr_getField", + docs=[{"_id": 1, "a": {"x": 42}}], + pipeline=[{"$project": {"r": {"$getField": {"field": "x", "input": "$a"}}}}], + expected=[{"_id": 1, "r": 42}], + msg="$getField should work in $project", + ), + StageTestCase( + "expr_let", + docs=[{"_id": 1, "a": 5}], + pipeline=[ + {"$project": {"r": {"$let": {"vars": {"x": "$a"}, "in": {"$multiply": ["$$x", 2]}}}}} + ], + expected=[{"_id": 1, "r": 10}], + msg="$let should work in $project", + ), + StageTestCase( + "expr_literal", + docs=[{"_id": 1}], + pipeline=[{"$project": {"r": {"$literal": "$notAFieldPath"}}}], + expected=[{"_id": 1, "r": "$notAFieldPath"}], + msg="$literal should work in $project", + ), + StageTestCase( + "expr_toHashedIndexKey", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$toHashedIndexKey": "$a"}}}], + expected=[{"_id": 1, "r": Int64(5347277839332858538)}], + msg="$toHashedIndexKey should work in $project", + ), + # Object. + StageTestCase( + "expr_mergeObjects", + docs=[{"_id": 1, "a": {"x": 1}, "b": {"y": 2}}], + pipeline=[{"$project": {"r": {"$mergeObjects": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": {"x": 1, "y": 2}}], + msg="$mergeObjects should work in $project", + ), + StageTestCase( + "expr_setField", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$project": {"r": {"$setField": {"field": "y", "input": "$a", "value": 2}}}}], + expected=[{"_id": 1, "r": {"x": 1, "y": 2}}], + msg="$setField should work in $project", + ), + StageTestCase( + "expr_unsetField", + docs=[{"_id": 1, "a": {"x": 1, "y": 2}}], + pipeline=[{"$project": {"r": {"$unsetField": {"field": "x", "input": "$a"}}}}], + expected=[{"_id": 1, "r": {"y": 2}}], + msg="$unsetField should work in $project", + ), + # Set. + StageTestCase( + "expr_allElementsTrue", + docs=[{"_id": 1, "a": [True, True]}], + pipeline=[{"$project": {"r": {"$allElementsTrue": ["$a"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$allElementsTrue should work in $project", + ), + StageTestCase( + "expr_anyElementTrue", + docs=[{"_id": 1, "a": [False, True]}], + pipeline=[{"$project": {"r": {"$anyElementTrue": ["$a"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$anyElementTrue should work in $project", + ), + StageTestCase( + "expr_setDifference", + docs=[{"_id": 1, "a": [1, 2, 3], "b": [2]}], + pipeline=[{"$project": {"r": {"$setDifference": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": [1, 3]}], + msg="$setDifference should work in $project", + ), + StageTestCase( + "expr_setEquals", + docs=[{"_id": 1, "a": [1, 2], "b": [2, 1]}], + pipeline=[{"$project": {"r": {"$setEquals": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$setEquals should work in $project", + ), + StageTestCase( + "expr_setIntersection", + docs=[{"_id": 1, "a": [1, 2, 3], "b": [2, 3, 4]}], + pipeline=[{"$project": {"r": {"$setIntersection": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": [2, 3]}], + msg="$setIntersection should work in $project", + ), + StageTestCase( + "expr_setIsSubset", + docs=[{"_id": 1, "a": [1, 2], "b": [1, 2, 3]}], + pipeline=[{"$project": {"r": {"$setIsSubset": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": True}], + msg="$setIsSubset should work in $project", + ), + # String. + StageTestCase( + "expr_concat", + docs=[{"_id": 1, "a": "hello", "b": " world"}], + pipeline=[{"$project": {"r": {"$concat": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": "hello world"}], + msg="$concat should work in $project", + ), + StageTestCase( + "expr_indexOfBytes", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$indexOfBytes": ["$a", "ll"]}}}], + expected=[{"_id": 1, "r": 2}], + msg="$indexOfBytes should work in $project", + ), + StageTestCase( + "expr_indexOfCP", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$indexOfCP": ["$a", "ll"]}}}], + expected=[{"_id": 1, "r": 2}], + msg="$indexOfCP should work in $project", + ), + StageTestCase( + "expr_ltrim", + docs=[{"_id": 1, "a": " hi"}], + pipeline=[{"$project": {"r": {"$ltrim": {"input": "$a"}}}}], + expected=[{"_id": 1, "r": "hi"}], + msg="$ltrim should work in $project", + ), + StageTestCase( + "expr_regexFind", + docs=[{"_id": 1, "a": "hello 123"}], + pipeline=[{"$project": {"r": {"$regexFind": {"input": "$a", "regex": "[0-9]+"}}}}], + expected=[{"_id": 1, "r": {"match": "123", "idx": 6, "captures": []}}], + msg="$regexFind should work in $project", + ), + StageTestCase( + "expr_regexFindAll", + docs=[{"_id": 1, "a": "a1b2"}], + pipeline=[{"$project": {"r": {"$regexFindAll": {"input": "$a", "regex": "[0-9]"}}}}], + expected=[ + { + "_id": 1, + "r": [ + {"match": "1", "idx": 1, "captures": []}, + {"match": "2", "idx": 3, "captures": []}, + ], + } + ], + msg="$regexFindAll should work in $project", + ), + StageTestCase( + "expr_regexMatch", + docs=[{"_id": 1, "a": "hello123"}], + pipeline=[{"$project": {"r": {"$regexMatch": {"input": "$a", "regex": "[0-9]+"}}}}], + expected=[{"_id": 1, "r": True}], + msg="$regexMatch should work in $project", + ), + StageTestCase( + "expr_replaceAll", + docs=[{"_id": 1, "a": "aabbcc"}], + pipeline=[ + {"$project": {"r": {"$replaceAll": {"input": "$a", "find": "b", "replacement": "x"}}}} + ], + expected=[{"_id": 1, "r": "aaxxcc"}], + msg="$replaceAll should work in $project", + ), + StageTestCase( + "expr_replaceOne", + docs=[{"_id": 1, "a": "aabbcc"}], + pipeline=[ + {"$project": {"r": {"$replaceOne": {"input": "$a", "find": "b", "replacement": "x"}}}} + ], + expected=[{"_id": 1, "r": "aaxbcc"}], + msg="$replaceOne should work in $project", + ), + StageTestCase( + "expr_rtrim", + docs=[{"_id": 1, "a": "hi "}], + pipeline=[{"$project": {"r": {"$rtrim": {"input": "$a"}}}}], + expected=[{"_id": 1, "r": "hi"}], + msg="$rtrim should work in $project", + ), + StageTestCase( + "expr_split", + docs=[{"_id": 1, "a": "a,b,c"}], + pipeline=[{"$project": {"r": {"$split": ["$a", ","]}}}], + expected=[{"_id": 1, "r": ["a", "b", "c"]}], + msg="$split should work in $project", + ), + StageTestCase( + "expr_strcasecmp", + docs=[{"_id": 1, "a": "abc", "b": "ABC"}], + pipeline=[{"$project": {"r": {"$strcasecmp": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 0}], + msg="$strcasecmp should work in $project", + ), + StageTestCase( + "expr_strLenBytes", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$strLenBytes": "$a"}}}], + expected=[{"_id": 1, "r": 5}], + msg="$strLenBytes should work in $project", + ), + StageTestCase( + "expr_strLenCP", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$strLenCP": "$a"}}}], + expected=[{"_id": 1, "r": 5}], + msg="$strLenCP should work in $project", + ), + StageTestCase( + "expr_substr", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$substr": ["$a", 1, 3]}}}], + expected=[{"_id": 1, "r": "ell"}], + msg="$substr should work in $project", + ), + StageTestCase( + "expr_substrBytes", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$substrBytes": ["$a", 1, 3]}}}], + expected=[{"_id": 1, "r": "ell"}], + msg="$substrBytes should work in $project", + ), + StageTestCase( + "expr_substrCP", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$substrCP": ["$a", 1, 3]}}}], + expected=[{"_id": 1, "r": "ell"}], + msg="$substrCP should work in $project", + ), + StageTestCase( + "expr_toLower", + docs=[{"_id": 1, "a": "HELLO"}], + pipeline=[{"$project": {"r": {"$toLower": "$a"}}}], + expected=[{"_id": 1, "r": "hello"}], + msg="$toLower should work in $project", + ), + StageTestCase( + "expr_toString", + docs=[{"_id": 1, "a": 123}], + pipeline=[{"$project": {"r": {"$toString": "$a"}}}], + expected=[{"_id": 1, "r": "123"}], + msg="$toString should work in $project", + ), + StageTestCase( + "expr_toUpper", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$project": {"r": {"$toUpper": "$a"}}}], + expected=[{"_id": 1, "r": "HELLO"}], + msg="$toUpper should work in $project", + ), + StageTestCase( + "expr_trim", + docs=[{"_id": 1, "a": " hi "}], + pipeline=[{"$project": {"r": {"$trim": {"input": "$a"}}}}], + expected=[{"_id": 1, "r": "hi"}], + msg="$trim should work in $project", + ), + # Timestamp. + StageTestCase( + "expr_tsIncrement", + docs=[{"_id": 1, "t": Timestamp(100, 5)}], + pipeline=[{"$project": {"r": {"$tsIncrement": "$t"}}}], + expected=[{"_id": 1, "r": Int64(5)}], + msg="$tsIncrement should work in $project", + ), + StageTestCase( + "expr_tsSecond", + docs=[{"_id": 1, "t": Timestamp(100, 5)}], + pipeline=[{"$project": {"r": {"$tsSecond": "$t"}}}], + expected=[{"_id": 1, "r": Int64(100)}], + msg="$tsSecond should work in $project", + ), + # Trigonometry. + StageTestCase( + "expr_acos", + docs=[{"_id": 1, "a": 0.5}], + pipeline=[{"$project": {"r": {"$acos": "$a"}}}], + expected=[{"_id": 1, "r": 1.0471975511965979}], + msg="$acos should work in $project", + ), + StageTestCase( + "expr_acosh", + docs=[{"_id": 1, "a": 2}], + pipeline=[{"$project": {"r": {"$acosh": "$a"}}}], + expected=[{"_id": 1, "r": 1.3169578969248166}], + msg="$acosh should work in $project", + ), + StageTestCase( + "expr_asin", + docs=[{"_id": 1, "a": 0.5}], + pipeline=[{"$project": {"r": {"$asin": "$a"}}}], + expected=[{"_id": 1, "r": 0.5235987755982989}], + msg="$asin should work in $project", + ), + StageTestCase( + "expr_asinh", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$asinh": "$a"}}}], + expected=[{"_id": 1, "r": 0.881373587019543}], + msg="$asinh should work in $project", + ), + StageTestCase( + "expr_atan", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$atan": "$a"}}}], + expected=[{"_id": 1, "r": 0.7853981633974483}], + msg="$atan should work in $project", + ), + StageTestCase( + "expr_atan2", + docs=[{"_id": 1, "a": 1, "b": 1}], + pipeline=[{"$project": {"r": {"$atan2": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": 0.7853981633974483}], + msg="$atan2 should work in $project", + ), + StageTestCase( + "expr_atanh", + docs=[{"_id": 1, "a": 0.5}], + pipeline=[{"$project": {"r": {"$atanh": "$a"}}}], + expected=[{"_id": 1, "r": 0.5493061443340548}], + msg="$atanh should work in $project", + ), + StageTestCase( + "expr_cos", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$project": {"r": {"$cos": "$a"}}}], + expected=[{"_id": 1, "r": 1.0}], + msg="$cos should work in $project", + ), + StageTestCase( + "expr_cosh", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$project": {"r": {"$cosh": "$a"}}}], + expected=[{"_id": 1, "r": 1.0}], + msg="$cosh should work in $project", + ), + StageTestCase( + "expr_degreesToRadians", + docs=[{"_id": 1, "a": 90}], + pipeline=[{"$project": {"r": {"$degreesToRadians": "$a"}}}], + expected=[{"_id": 1, "r": 1.5707963267948966}], + msg="$degreesToRadians should work in $project", + ), + StageTestCase( + "expr_radiansToDegrees", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$radiansToDegrees": "$a"}}}], + expected=[{"_id": 1, "r": 57.29577951308232}], + msg="$radiansToDegrees should work in $project", + ), + StageTestCase( + "expr_sin", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$sin": "$a"}}}], + expected=[{"_id": 1, "r": 0.8414709848078965}], + msg="$sin should work in $project", + ), + StageTestCase( + "expr_sinh", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$sinh": "$a"}}}], + expected=[{"_id": 1, "r": 1.1752011936438014}], + msg="$sinh should work in $project", + ), + StageTestCase( + "expr_tan", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$tan": "$a"}}}], + expected=[{"_id": 1, "r": 1.5574077246549023}], + msg="$tan should work in $project", + ), + StageTestCase( + "expr_tanh", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$tanh": "$a"}}}], + expected=[{"_id": 1, "r": 0.7615941559557649}], + msg="$tanh should work in $project", + ), + # Type. + StageTestCase( + "expr_convert", + docs=[{"_id": 1, "a": "123"}], + pipeline=[{"$project": {"r": {"$convert": {"input": "$a", "to": "int"}}}}], + expected=[{"_id": 1, "r": 123}], + msg="$convert should work in $project", + ), + StageTestCase( + "expr_isNumber", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"r": {"$isNumber": "$a"}}}], + expected=[{"_id": 1, "r": True}], + msg="$isNumber should work in $project", + ), + StageTestCase( + "expr_toBool", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$project": {"r": {"$toBool": "$a"}}}], + expected=[{"_id": 1, "r": True}], + msg="$toBool should work in $project", + ), + StageTestCase( + "expr_toDecimal", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"r": {"$toDecimal": "$a"}}}], + expected=[{"_id": 1, "r": Decimal128("42")}], + msg="$toDecimal should work in $project", + ), + StageTestCase( + "expr_toDouble", + docs=[{"_id": 1, "a": "3.14"}], + pipeline=[{"$project": {"r": {"$toDouble": "$a"}}}], + expected=[{"_id": 1, "r": 3.14}], + msg="$toDouble should work in $project", + ), + StageTestCase( + "expr_toInt", + docs=[{"_id": 1, "a": 3.9}], + pipeline=[{"$project": {"r": {"$toInt": "$a"}}}], + expected=[{"_id": 1, "r": 3}], + msg="$toInt should work in $project", + ), + StageTestCase( + "expr_toLong", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"r": {"$toLong": "$a"}}}], + expected=[{"_id": 1, "r": Int64(42)}], + msg="$toLong should work in $project", + ), + StageTestCase( + "expr_toObjectId", + docs=[{"_id": 1, "a": "507f1f77bcf86cd799439011"}], + pipeline=[{"$project": {"r": {"$toObjectId": "$a"}}}], + expected=[{"_id": 1, "r": ObjectId("507f1f77bcf86cd799439011")}], + msg="$toObjectId should work in $project", + ), + StageTestCase( + "expr_type", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"r": {"$type": "$a"}}}], + expected=[{"_id": 1, "r": "int"}], + msg="$type should work in $project", + ), + StageTestCase( + "expr_toUUID", + docs=[{"_id": 1, "a": "12345678-1234-1234-1234-123456789abc"}], + pipeline=[{"$project": {"r": {"$toUUID": "$a"}}}], + expected=[{"_id": 1, "r": Binary.from_uuid(UUID("12345678-1234-1234-1234-123456789abc"))}], + msg="$toUUID should work in $project", + ), + # Accumulator (as expressions in $project). + StageTestCase( + "expr_sum", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$project": {"r": {"$sum": "$a"}}}], + expected=[{"_id": 1, "r": 6}], + msg="$sum should work in $project", + ), + StageTestCase( + "expr_avg", + docs=[{"_id": 1, "a": [2, 4, 6]}], + pipeline=[{"$project": {"r": {"$avg": "$a"}}}], + expected=[{"_id": 1, "r": 4.0}], + msg="$avg should work in $project", + ), + StageTestCase( + "expr_min", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$project": {"r": {"$min": "$a"}}}], + expected=[{"_id": 1, "r": 1}], + msg="$min should work in $project", + ), + StageTestCase( + "expr_max", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$project": {"r": {"$max": "$a"}}}], + expected=[{"_id": 1, "r": 3}], + msg="$max should work in $project", + ), + StageTestCase( + "expr_first", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"r": {"$first": "$a"}}}], + expected=[{"_id": 1, "r": 10}], + msg="$first should work in $project", + ), + StageTestCase( + "expr_last", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"r": {"$last": "$a"}}}], + expected=[{"_id": 1, "r": 30}], + msg="$last should work in $project", + ), + StageTestCase( + "expr_stdDevPop", + docs=[{"_id": 1, "a": [2, 4, 4, 4, 5, 5, 7, 9]}], + pipeline=[{"$project": {"r": {"$stdDevPop": "$a"}}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$stdDevPop should work in $project", + ), + StageTestCase( + "expr_stdDevSamp", + docs=[{"_id": 1, "a": [1, 3]}], + pipeline=[{"$project": {"r": {"$stdDevSamp": "$a"}}}], + expected=[{"_id": 1, "r": 1.4142135623730951}], + msg="$stdDevSamp should work in $project", + ), + StageTestCase( + "expr_median", + docs=[{"_id": 1, "a": [1, 2, 3, 4, 5]}], + pipeline=[{"$project": {"r": {"$median": {"input": "$a", "method": "approximate"}}}}], + expected=[{"_id": 1, "r": 3.0}], + msg="$median should work in $project", + ), + StageTestCase( + "expr_percentile", + docs=[{"_id": 1, "a": [1, 2, 3, 4, 5]}], + pipeline=[ + { + "$project": { + "r": {"$percentile": {"input": "$a", "p": [0.5], "method": "approximate"}} + } + } + ], + expected=[{"_id": 1, "r": [3.0]}], + msg="$percentile should work in $project", + ), + # Set (additional). + StageTestCase( + "expr_setUnion", + docs=[{"_id": 1, "a": [1, 2], "b": [2, 3]}], + pipeline=[{"$project": {"r": {"$setUnion": ["$a", "$b"]}}}], + expected=[{"_id": 1, "r": [1, 2, 3]}], + msg="$setUnion should work in $project", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_EXPRESSION_TESTS)) +def test_project_expression_cases(collection: Any, test_case: StageTestCase): + """Test that expression operators work within $project.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_id.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_id.py new file mode 100644 index 00000000..b3d4663f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_id.py @@ -0,0 +1,96 @@ +"""Tests for $project _id field behavior.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [_id Field Behavior]: _id has special projection rules for +# suppression, mode determination, and sub-field projection. +PROJECT_ID_BEHAVIOR_TESTS: list[StageTestCase] = [ + StageTestCase( + "id_suppress_in_inclusion_mode", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"_id": 0, "a": 1}}], + expected=[{"a": 10}], + msg="$project should suppress _id from output when _id: 0 in inclusion mode", + ), + StageTestCase( + "id_explicit_in_exclusion_mode", + docs=[{"_id": 42, "a": 10, "b": 20}], + pipeline=[{"$project": {"_id": 1, "a": 0}}], + expected=[{"_id": 42, "b": 20}], + msg="$project should allow explicit _id: 1 in exclusion mode", + ), + StageTestCase( + "id_null_computed", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"_id": None, "a": 1}}], + expected=[{"a": 10, "_id": None}], + msg="$project should treat _id: null as a computed expression setting _id to null", + ), + StageTestCase( + "id_remove_alone_produces_empty", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"_id": "$$REMOVE"}}], + expected=[{}], + msg="$project with only _id: $$REMOVE should produce an empty document", + ), + StageTestCase( + "id_subfield_inclusion", + docs=[{"_id": {"x": 1, "y": 2}, "a": 10}], + pipeline=[{"$project": {"_id.x": 1, "a": 1}}], + expected=[{"_id": {"x": 1}, "a": 10}], + msg="$project should project _id sub-fields via dotted path inclusion", + ), + StageTestCase( + "id_subfield_exclusion", + docs=[{"_id": {"x": 1, "y": 2}, "a": 10}], + pipeline=[{"$project": {"_id.x": 0}}], + expected=[{"_id": {"y": 2}, "a": 10}], + msg="$project should exclude _id sub-fields via dotted path exclusion", + ), + StageTestCase( + "id_zero_alone_exclusion_mode", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"_id": 0}}], + expected=[{"a": 10, "b": 20}], + msg="$project with only _id: 0 should be exclusion mode returning all other fields", + ), + StageTestCase( + "id_one_alone_inclusion_mode", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"_id": 1}}], + expected=[{"_id": 1}], + msg="$project with only _id: 1 should be inclusion mode returning only _id", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_ID_BEHAVIOR_TESTS)) +def test_project_id(collection: Any, test_case: StageTestCase) -> None: + """Test $project _id field behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_modes.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_modes.py new file mode 100644 index 00000000..b60aaa3e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_modes.py @@ -0,0 +1,251 @@ +"""Tests for $project inclusion and exclusion mode determination.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_INFINITY, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_NEGATIVE_ZERO, + DECIMAL128_ZERO, + DOUBLE_NEGATIVE_ZERO, + DOUBLE_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT64_ZERO, +) + +# Property [Inclusion Semantics]: only explicitly included fields (plus _id) +# appear in the output. +PROJECT_INCLUSION_TESTS: list[StageTestCase] = [ + StageTestCase( + "inclusion_id_default_and_only_specified", + docs=[{"_id": 1, "a": 10, "b": 20, "c": 30}], + pipeline=[{"$project": {"a": 1}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should include _id by default and only output explicitly included fields", + ), + StageTestCase( + "inclusion_nonexistent_field_omitted", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$project": {"a": 1, "z": 1}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should silently omit a non-existent included field", + ), + StageTestCase( + "inclusion_multiple_fields", + docs=[{"_id": 1, "a": 10, "b": 20, "c": 30}], + pipeline=[{"$project": {"a": 1, "c": 1}}], + expected=[{"_id": 1, "a": 10, "c": 30}], + msg="$project should include multiple fields simultaneously", + ), +] + +# Property [Truthy Inclusion Flags]: various non-zero numeric types and true +# are all treated as inclusion flags equivalent to 1. +PROJECT_INCLUSION_FLAG_TESTS: list[StageTestCase] = [ + StageTestCase( + "inclusion_flag_int_positive", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": 2}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat non-zero positive integer as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_int_negative", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": -1}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat negative integer as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_float_fraction", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": 0.5}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat non-zero float as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_float_nan", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": FLOAT_NAN}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat NaN as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_float_infinity", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": FLOAT_INFINITY}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat Infinity as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_float_neg_infinity", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": FLOAT_NEGATIVE_INFINITY}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat -Infinity as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_int64", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": Int64(99)}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat non-zero Int64 as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_decimal128", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": Decimal128("42")}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat non-zero Decimal128 as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_decimal128_nan", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": DECIMAL128_NAN}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat Decimal128('NaN') as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_decimal128_infinity", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": DECIMAL128_INFINITY}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat Decimal128('Infinity') as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_decimal128_neg_infinity", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": DECIMAL128_NEGATIVE_INFINITY}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat Decimal128('-Infinity') as truthy inclusion flag", + ), + StageTestCase( + "inclusion_flag_bool_true", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": True}}], + expected=[{"_id": 1, "a": 10}], + msg="$project should treat true as inclusion flag", + ), +] + +# Property [Exclusion Semantics]: excluding a field removes it from output +# while all other fields are returned. +PROJECT_EXCLUSION_TESTS: list[StageTestCase] = [ + StageTestCase( + "exclusion_removes_field", + docs=[{"_id": 1, "a": 10, "b": 20, "c": 30}], + pipeline=[{"$project": {"b": 0}}], + expected=[{"_id": 1, "a": 10, "c": 30}], + msg="$project should remove excluded field and return all others", + ), + StageTestCase( + "exclusion_nonexistent_field", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"z": 0}}], + expected=[{"_id": 1, "a": 10, "b": 20}], + msg="$project should have no effect when excluding a non-existent field", + ), + StageTestCase( + "exclusion_multiple_fields", + docs=[{"_id": 1, "a": 10, "b": 20, "c": 30}], + pipeline=[{"$project": {"a": 0, "c": 0}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should exclude multiple fields simultaneously", + ), +] + +# Property [Falsy Exclusion Flags]: various zero-valued numeric types and false +# are all treated as exclusion flags equivalent to 0. +PROJECT_EXCLUSION_FLAG_TESTS: list[StageTestCase] = [ + StageTestCase( + "exclusion_flag_int_zero", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": 0}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should treat 0 as falsy exclusion flag", + ), + StageTestCase( + "exclusion_flag_float_zero", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": DOUBLE_ZERO}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should treat 0.0 as falsy exclusion flag", + ), + StageTestCase( + "exclusion_flag_float_neg_zero", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": DOUBLE_NEGATIVE_ZERO}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should treat -0.0 as falsy exclusion flag", + ), + StageTestCase( + "exclusion_flag_int64_zero", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": INT64_ZERO}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should treat Int64(0) as falsy exclusion flag", + ), + StageTestCase( + "exclusion_flag_decimal128_zero", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": DECIMAL128_ZERO}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should treat Decimal128('0') as falsy exclusion flag", + ), + StageTestCase( + "exclusion_flag_decimal128_neg_zero", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": DECIMAL128_NEGATIVE_ZERO}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should treat Decimal128('-0') as falsy exclusion flag", + ), + StageTestCase( + "exclusion_flag_bool_false", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$project": {"a": False}}], + expected=[{"_id": 1, "b": 20}], + msg="$project should treat false as exclusion flag", + ), +] + +PROJECT_MODE_TESTS = ( + PROJECT_INCLUSION_TESTS + + PROJECT_INCLUSION_FLAG_TESTS + + PROJECT_EXCLUSION_TESTS + + PROJECT_EXCLUSION_FLAG_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_MODE_TESTS)) +def test_project_modes(collection: Any, test_case: StageTestCase) -> None: + """Test $project inclusion and exclusion modes.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_paths.py b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_paths.py new file mode 100644 index 00000000..19aaaac4 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/project/test_project_paths.py @@ -0,0 +1,303 @@ +"""Tests for $project dotted paths, array traversal, and numeric path components.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Dotted Path and Embedded Document Projection]: nested fields +# can be included, excluded, or computed via dotted paths and sub-document +# notation, with correct traversal through arrays and scalar parents. +PROJECT_DOTTED_PATH_TESTS: list[StageTestCase] = [ + StageTestCase( + "dotted_inclusion_nested", + docs=[{"_id": 1, "a": {"b": 10, "c": 20}, "d": 30}], + pipeline=[{"$project": {"a.b": 1}}], + expected=[{"_id": 1, "a": {"b": 10}}], + msg="$project should include a nested field via dotted path", + ), + StageTestCase( + "dotted_exclusion_nested", + docs=[{"_id": 1, "a": {"b": 10, "c": 20}, "d": 30}], + pipeline=[{"$project": {"a.b": 0}}], + expected=[{"_id": 1, "a": {"c": 20}, "d": 30}], + msg="$project should exclude a nested field via dotted path", + ), + StageTestCase( + "dotted_subdoc_inclusion_equivalent", + docs=[{"_id": 1, "a": {"b": 10, "c": 20}, "d": 30}], + pipeline=[{"$project": {"a": {"b": 1}}}], + expected=[{"_id": 1, "a": {"b": 10}}], + msg="$project sub-document notation should be equivalent to dotted path inclusion", + ), + StageTestCase( + "dotted_subdoc_exclusion_equivalent", + docs=[{"_id": 1, "a": {"b": 10, "c": 20}, "d": 30}], + pipeline=[{"$project": {"a": {"b": 0}}}], + expected=[{"_id": 1, "a": {"c": 20}, "d": 30}], + msg="$project sub-document notation should be equivalent to dotted path exclusion", + ), + StageTestCase( + "dotted_through_array_inclusion", + docs=[{"_id": 1, "a": [{"b": 1, "c": 10}, {"b": 2, "c": 20}, {"b": 3, "c": 30}]}], + pipeline=[{"$project": {"a.b": 1}}], + expected=[{"_id": 1, "a": [{"b": 1}, {"b": 2}, {"b": 3}]}], + msg="$project dotted path should traverse into array elements for inclusion", + ), + StageTestCase( + "dotted_through_array_exclusion", + docs=[{"_id": 1, "a": [{"b": 1, "c": 10}, {"b": 2, "c": 20}]}], + pipeline=[{"$project": {"a.b": 0}}], + expected=[{"_id": 1, "a": [{"c": 10}, {"c": 20}]}], + msg="$project dotted path should traverse into array elements for exclusion", + ), + StageTestCase( + "dotted_deep_nested_inclusion", + docs=[{"_id": 1, "a": {"b": {"c": {"d": 42}}}}], + pipeline=[{"$project": {"a.b.c.d": 1}}], + expected=[{"_id": 1, "a": {"b": {"c": {"d": 42}}}}], + msg="$project should handle deeply nested dotted paths (3+ levels) for inclusion", + ), + StageTestCase( + "dotted_deep_nested_exclusion", + docs=[{"_id": 1, "a": {"b": {"c": {"d": 42, "e": 99}}}}], + pipeline=[{"$project": {"a.b.c.d": 0}}], + expected=[{"_id": 1, "a": {"b": {"c": {"e": 99}}}}], + msg="$project should handle deeply nested dotted paths (3+ levels) for exclusion", + ), + StageTestCase( + "dotted_key_in_subdoc", + docs=[{"_id": 1, "a": {"b": {"c": 10}, "d": 20}}], + pipeline=[{"$project": {"a": {"b.c": 1}}}], + expected=[{"_id": 1, "a": {"b": {"c": 10}}}], + msg="$project should support dotted key names inside sub-document notation", + ), + StageTestCase( + "dotted_scalar_parent_inclusion_omits", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"a.b": 1}}], + expected=[{"_id": 1}], + msg=( + "$project inclusion via dotted path should omit the field" + " when the parent is a scalar" + ), + ), + StageTestCase( + "dotted_scalar_parent_exclusion_preserves", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"a.b": 0}}], + expected=[{"_id": 1, "a": 42}], + msg=( + "$project exclusion via dotted path should preserve the scalar" + " when the parent is a scalar" + ), + ), + StageTestCase( + "dotted_scalar_parent_computed_replaces", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$project": {"a.b": {"$literal": 99}}}], + expected=[{"_id": 1, "a": {"b": 99}}], + msg=( + "$project computed via dotted path should replace the scalar" + " with an object containing the computed field" + ), + ), +] + +# Property [Array Traversal]: dotted path projection through arrays +# filters, preserves, or transforms array elements depending on the +# projection mode. +PROJECT_ARRAY_TRAVERSAL_TESTS: list[StageTestCase] = [ + StageTestCase( + "array_traversal_inclusion_removes_scalars_and_nulls", + docs=[{"_id": 1, "a": [{"b": 1, "c": 10}, "scalar", None, {"b": 2, "c": 20}]}], + pipeline=[{"$project": {"a.b": 1}}], + expected=[{"_id": 1, "a": [{"b": 1}, {"b": 2}]}], + msg=( + "$project inclusion through arrays should remove scalars and" + " nulls and extract matching fields from objects" + ), + ), + StageTestCase( + "array_traversal_exclusion_preserves_scalars_and_nulls", + docs=[{"_id": 1, "a": [{"b": 1, "c": 10}, "scalar", None, {"b": 2, "c": 20}]}], + pipeline=[{"$project": {"a.b": 0}}], + expected=[{"_id": 1, "a": [{"c": 10}, "scalar", None, {"c": 20}]}], + msg=( + "$project exclusion through arrays should preserve scalars" + " and nulls and remove matching fields from objects" + ), + ), + StageTestCase( + "array_traversal_computed_overwrites", + docs=[{"_id": 1, "a": [{"b": 1, "c": 10}, "scalar", None, {"b": 2, "c": 20}]}], + pipeline=[{"$project": {"a.b": {"$literal": 99}}}], + expected=[{"_id": 1, "a": [{"b": 99}, {"b": 99}, {"b": 99}, {"b": 99}]}], + msg=( + "$project computed through arrays should overwrite the field" + " in objects, turn scalars into objects, and turn nulls into" + " objects with the computed field" + ), + ), + StageTestCase( + "array_traversal_nested_arrays_inclusion", + docs=[{"_id": 1, "a": [[{"b": 1, "c": 10}, {"b": 2, "c": 20}], [{"b": 3, "c": 30}]]}], + pipeline=[{"$project": {"a.b": 1}}], + expected=[{"_id": 1, "a": [[{"b": 1}, {"b": 2}], [{"b": 3}]]}], + msg="$project inclusion should traverse nested arrays", + ), + StageTestCase( + "array_traversal_nested_arrays_exclusion", + docs=[{"_id": 1, "a": [[{"b": 1, "c": 10}, {"b": 2, "c": 20}], [{"b": 3, "c": 30}]]}], + pipeline=[{"$project": {"a.b": 0}}], + expected=[{"_id": 1, "a": [[{"c": 10}, {"c": 20}], [{"c": 30}]]}], + msg="$project exclusion should traverse nested arrays", + ), + StageTestCase( + "array_traversal_nested_arrays_computed", + docs=[{"_id": 1, "a": [[{"b": 1}, {"b": 2}], [{"b": 3}]]}], + pipeline=[{"$project": {"a.b": {"$literal": 99}}}], + expected=[{"_id": 1, "a": [[{"b": 99}, {"b": 99}], [{"b": 99}]]}], + msg="$project computed should traverse nested arrays", + ), + StageTestCase( + "array_traversal_empty_array_inclusion", + docs=[{"_id": 1, "a": []}], + pipeline=[{"$project": {"a.b": 1}}], + expected=[{"_id": 1, "a": []}], + msg="$project inclusion should preserve an empty array as-is", + ), + StageTestCase( + "array_traversal_empty_array_exclusion", + docs=[{"_id": 1, "a": []}], + pipeline=[{"$project": {"a.b": 0}}], + expected=[{"_id": 1, "a": []}], + msg="$project exclusion should preserve an empty array as-is", + ), + StageTestCase( + "array_traversal_empty_array_computed", + docs=[{"_id": 1, "a": []}], + pipeline=[{"$project": {"a.b": {"$literal": 99}}}], + expected=[{"_id": 1, "a": []}], + msg="$project computed should preserve an empty array as-is", + ), + StageTestCase( + "array_traversal_nested_empty_array_inclusion", + docs=[{"_id": 1, "a": [[]]}], + pipeline=[{"$project": {"a.b": 1}}], + expected=[{"_id": 1, "a": [[]]}], + msg="$project inclusion should preserve a nested empty array structure", + ), + StageTestCase( + "array_traversal_nested_empty_array_exclusion", + docs=[{"_id": 1, "a": [[]]}], + pipeline=[{"$project": {"a.b": 0}}], + expected=[{"_id": 1, "a": [[]]}], + msg="$project exclusion should preserve a nested empty array structure", + ), + StageTestCase( + "array_traversal_nested_empty_array_computed", + docs=[{"_id": 1, "a": [[]]}], + pipeline=[{"$project": {"a.b": {"$literal": 99}}}], + expected=[{"_id": 1, "a": [[]]}], + msg="$project computed should preserve a nested empty array structure", + ), +] + +# Property [Numeric Path Components]: numeric path components are treated +# as object key names, not array indices. +PROJECT_NUMERIC_PATH_TESTS: list[StageTestCase] = [ + StageTestCase( + "numeric_path_inclusion_array_returns_empty", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"a.0": 1}}], + expected=[{"_id": 1, "a": []}], + msg=( + "$project inclusion with numeric path component on an array" + " should return an empty array, not the element at that index" + ), + ), + StageTestCase( + "numeric_path_exclusion_array_preserves", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"a.0": 0}}], + expected=[{"_id": 1, "a": [10, 20, 30]}], + msg=( + "$project exclusion with numeric path component on an array" + " should preserve the array as-is" + ), + ), + StageTestCase( + "numeric_path_computed_array_creates_objects", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"a.0": {"$literal": 99}}}], + expected=[{"_id": 1, "a": [{"0": 99}, {"0": 99}, {"0": 99}]}], + msg=( + "$project computed with numeric path component on an array" + " should create objects with the numeric key in each element" + ), + ), + StageTestCase( + "numeric_path_field_ref_array_returns_empty", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$project": {"result": "$a.0"}}], + expected=[{"_id": 1, "result": []}], + msg=( + "$project field path reference with numeric component on an" + " array should return an empty array, not the indexed element" + ), + ), + StageTestCase( + "numeric_path_inclusion_object_with_numeric_key", + docs=[{"_id": 1, "a": {"0": "zero", "1": "one", "x": "other"}}], + pipeline=[{"$project": {"a.0": 1}}], + expected=[{"_id": 1, "a": {"0": "zero"}}], + msg=( + "$project inclusion with numeric path component on an object" + " should match the literal key name" + ), + ), + StageTestCase( + "numeric_path_field_ref_object_with_numeric_key", + docs=[{"_id": 1, "a": {"0": "zero", "1": "one"}}], + pipeline=[{"$project": {"result": "$a.0"}}], + expected=[{"_id": 1, "result": "zero"}], + msg=( + "$project field path reference with numeric component on an" + " object should return the value at the literal key name" + ), + ), +] + +PROJECT_PATH_TESTS = ( + PROJECT_DOTTED_PATH_TESTS + PROJECT_ARRAY_TRAVERSAL_TESTS + PROJECT_NUMERIC_PATH_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_PATH_TESTS)) +def test_project_paths(collection: Any, test_case: StageTestCase) -> None: + """Test $project path resolution.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_project.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_project.py new file mode 100644 index 00000000..9241dc50 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_project.py @@ -0,0 +1,147 @@ +"""Tests for $project composing with other stages at different pipeline positions.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Position]: $project reshapes documents correctly when +# composed with other stage types at different pipeline positions. +PROJECT_PIPELINE_POSITION_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_after_match", + docs=[ + {"_id": 1, "a": 10, "b": 20}, + {"_id": 2, "a": 30, "b": 40}, + ], + pipeline=[ + {"$match": {"a": {"$gt": 15}}}, + {"$project": {"b": 1}}, + ], + expected=[{"_id": 2, "b": 40}], + msg="$project should work after a $match stage", + ), + StageTestCase( + "pipeline_before_match", + docs=[ + {"_id": 1, "a": 10, "b": 20}, + {"_id": 2, "a": 30, "b": 40}, + ], + pipeline=[ + {"$project": {"a": 1}}, + {"$match": {"a": {"$gt": 15}}}, + ], + expected=[{"_id": 2, "a": 30}], + msg="$project should work before a $match stage", + ), + StageTestCase( + "pipeline_project_then_sort", + docs=[ + {"_id": 1, "a": 30, "b": 1}, + {"_id": 2, "a": 10, "b": 2}, + {"_id": 3, "a": 20, "b": 3}, + ], + pipeline=[ + {"$project": {"a": 1}}, + {"$sort": {"a": 1}}, + ], + expected=[ + {"_id": 2, "a": 10}, + {"_id": 3, "a": 20}, + {"_id": 1, "a": 30}, + ], + msg="$project should compose with a subsequent $sort stage", + ), + StageTestCase( + "pipeline_after_group", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "b", "val": 3}, + {"_id": 3, "cat": "a", "val": 7}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$project": {"total": 1}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "a", "total": 12}, + {"_id": "b", "total": 3}, + ], + msg="$project should reshape documents produced by $group", + ), + StageTestCase( + "pipeline_before_group", + docs=[ + {"_id": 1, "cat": "a", "val": 5, "extra": "x"}, + {"_id": 2, "cat": "a", "val": 7, "extra": "y"}, + ], + pipeline=[ + {"$project": {"cat": 1, "val": 1}}, + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + ], + expected=[{"_id": "a", "total": 12}], + msg="$project should narrow fields before $group", + ), + StageTestCase( + "pipeline_after_unwind", + docs=[{"_id": 1, "a": [10, 20], "b": "keep"}], + pipeline=[ + {"$unwind": "$a"}, + {"$project": {"a": 1}}, + ], + expected=[ + {"_id": 1, "a": 10}, + {"_id": 1, "a": 20}, + ], + msg="$project should reshape unwound documents", + ), + StageTestCase( + "pipeline_after_addFields", + docs=[{"_id": 1, "a": 5}], + pipeline=[ + {"$addFields": {"b": {"$multiply": ["$a", 2]}}}, + {"$project": {"b": 1}}, + ], + expected=[{"_id": 1, "b": 10}], + msg="$project should include fields added by $addFields", + ), + StageTestCase( + "pipeline_after_replaceRoot", + docs=[{"_id": 1, "inner": {"x": 10, "y": 20}}], + pipeline=[ + {"$replaceRoot": {"newRoot": "$inner"}}, + {"$project": {"x": 1}}, + ], + expected=[{"x": 10}], + msg="$project should reshape documents after $replaceRoot", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(PROJECT_PIPELINE_POSITION_TESTS)) +def test_stages_position_project_cases(collection, test_case: StageTestCase): + """Test $project composing with other stages at different pipeline positions.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 67a764f6..b23d269b 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -103,10 +103,10 @@ SORT_ILLEGAL_META_ERROR = 31138 PROJECT_PATH_COLLISION_CHILD_AFTER_PARENT_ERROR = 31249 PROJECT_PATH_COLLISION_PARENT_AFTER_CHILD_ERROR = 31250 -PROJECT_EXCLUSION_IN_INCLUSION_ERROR = 31252 +PROJECT_OPERATOR_IN_EXCLUSION_ERROR = 31252 PROJECT_INCLUSION_IN_EXCLUSION_ERROR = 31253 -PROJECT_EXCLUSION_AFTER_INCLUSION_ERROR = 31254 -PROJECT_EXPRESSION_IN_EXCLUSION_ERROR = 31310 +PROJECT_EXCLUSION_IN_INCLUSION_ERROR = 31254 +PROJECT_VALUE_IN_EXCLUSION_ERROR = 31310 PROJECT_UNKNOWN_EXPRESSION_ERROR = 31325 REVERSE_ARRAY_NOT_ARRAY_ERROR = 34435 RANGE_START_NOT_INT32_ERROR = 34443