From 779626f767209e1d647bc4b039e79c5c243e492d Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 8 Apr 2026 14:12:40 -0700 Subject: [PATCH] Add $set/$addFields stage tests Signed-off-by: Daniel Frankcom --- .../addFields/utils/operator_addFields.py | 7 + .../core/operator/stages/set/__init__.py | 0 .../stages/set/test_set_bson_types.py | 179 +++ .../operator/stages/set/test_set_errors.py | 343 +++++ .../stages/set/test_set_expressions.py | 1230 +++++++++++++++++ .../stages/set/test_set_field_names.py | 143 ++ .../stages/set/test_set_field_values.py | 264 ++++ .../operator/stages/set/test_set_paths.py | 225 +++ .../operator/stages/set/test_set_pipeline.py | 84 ++ .../operator/stages/set/utils/__init__.py | 0 .../operator/stages/set/utils/set_common.py | 31 + .../stages/test_stages_position_set.py | 147 ++ 12 files changed, 2653 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/addFields/utils/operator_addFields.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_bson_types.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_expressions.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_names.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_values.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_paths.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_pipeline.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/set/utils/set_common.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_set.py diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/addFields/utils/operator_addFields.py b/documentdb_tests/compatibility/tests/core/operator/stages/addFields/utils/operator_addFields.py new file mode 100644 index 00000000..6e1b392d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/addFields/utils/operator_addFields.py @@ -0,0 +1,7 @@ +# $addFields is an alias for $set. All tests in the $set test file are +# parametrized over both stage names. +import pytest + +ADD_FIELDS_OPERATOR = pytest.param("$addFields", id="addFields") + +__all__ = ["ADD_FIELDS_OPERATOR"] diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_bson_types.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_bson_types.py new file mode 100644 index 00000000..fa8dda9c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_bson_types.py @@ -0,0 +1,179 @@ +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import Code, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.set.utils.set_common import ( + STAGE_NAMES, + _replace_stage_name, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [BSON Type Pass-Through]: all BSON types are accepted as field +# values and pass through unchanged, including empty arrays, empty objects, +# nested arrays, and arrays containing null. +SET_BSON_TYPE_TESTS: list[StageTestCase] = [ + StageTestCase( + "bson_string", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": "hello"}}], + expected=[{"_id": 1, "v": "hello"}], + msg="$set should pass through a string value unchanged", + ), + StageTestCase( + "bson_int32", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": 42}}], + expected=[{"_id": 1, "v": 42}], + msg="$set should pass through an int32 value unchanged", + ), + StageTestCase( + "bson_int64", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": Int64(9_000_000_000)}}], + expected=[{"_id": 1, "v": Int64(9_000_000_000)}], + msg="$set should pass through an Int64 value unchanged", + ), + StageTestCase( + "bson_float", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": 3.14}}], + expected=[{"_id": 1, "v": 3.14}], + msg="$set should pass through a float value unchanged", + ), + StageTestCase( + "bson_datetime", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": datetime(2023, 1, 15)}}], + expected=[{"_id": 1, "v": datetime(2023, 1, 15)}], + msg="$set should pass through a datetime value unchanged", + ), + StageTestCase( + "bson_objectid", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": ObjectId("507f1f77bcf86cd799439011")}}], + expected=[{"_id": 1, "v": ObjectId("507f1f77bcf86cd799439011")}], + msg="$set should pass through an ObjectId value unchanged", + ), + StageTestCase( + "bson_decimal128", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": Decimal128("123.456")}}], + expected=[{"_id": 1, "v": Decimal128("123.456")}], + msg="$set should pass through a Decimal128 value unchanged", + ), + StageTestCase( + "bson_binary", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": b"\x01\x02\x03"}}], + expected=[{"_id": 1, "v": b"\x01\x02\x03"}], + msg="$set should pass through a binary value unchanged", + ), + StageTestCase( + "bson_regex", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": Regex("abc", "i")}}], + expected=[{"_id": 1, "v": Regex("abc", "i")}], + msg="$set should pass through a Regex value unchanged", + ), + StageTestCase( + "bson_timestamp", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": Timestamp(1234567890, 1)}}], + expected=[{"_id": 1, "v": Timestamp(1234567890, 1)}], + msg="$set should pass through a Timestamp value unchanged", + ), + StageTestCase( + "bson_maxkey", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": MaxKey()}}], + expected=[{"_id": 1, "v": MaxKey()}], + msg="$set should pass through a MaxKey value unchanged", + ), + StageTestCase( + "bson_minkey", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": MinKey()}}], + expected=[{"_id": 1, "v": MinKey()}], + msg="$set should pass through a MinKey value unchanged", + ), + StageTestCase( + "bson_code", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": Code("function() {}")}}], + expected=[{"_id": 1, "v": Code("function() {}")}], + msg="$set should pass through a JavaScript code value unchanged", + ), + StageTestCase( + "bson_code_with_scope", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": Code("function() {}", {"x": 1})}}], + expected=[{"_id": 1, "v": Code("function() {}", {"x": 1})}], + msg="$set should pass through a JavaScript code with scope value unchanged", + ), + StageTestCase( + "bson_array", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": {"$literal": [1, "two", 3.0]}}}], + expected=[{"_id": 1, "v": [1, "two", 3.0]}], + msg="$set should pass through an array value unchanged", + ), + StageTestCase( + "bson_empty_array", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": {"$literal": []}}}], + expected=[{"_id": 1, "v": []}], + msg="$set should pass through an empty array unchanged", + ), + StageTestCase( + "bson_nested_array", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": {"$literal": [[1, 2], [3, 4]]}}}], + expected=[{"_id": 1, "v": [[1, 2], [3, 4]]}], + msg="$set should pass through a nested array unchanged", + ), + StageTestCase( + "bson_array_with_null", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": {"$literal": [1, None, 3]}}}], + expected=[{"_id": 1, "v": [1, None, 3]}], + msg="$set should pass through an array containing null unchanged", + ), + StageTestCase( + "bson_empty_object", + docs=[{"_id": 1}], + pipeline=[{"$set": {"v": {"$literal": {}}}}], + expected=[{"_id": 1, "v": {}}], + msg="$set should pass through an empty object unchanged", + ), +] + + +@pytest.mark.parametrize("stage_name", STAGE_NAMES) +@pytest.mark.parametrize("test_case", pytest_params(SET_BSON_TYPE_TESTS)) +def test_set_bson_types(collection, stage_name: str, test_case: StageTestCase): + """Test $set / $addFields BSON type pass-through cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + pipeline = _replace_stage_name(test_case.pipeline, stage_name) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=f"{stage_name!r}: {test_case.msg!r}", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_errors.py new file mode 100644 index 00000000..fe349ba5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_errors.py @@ -0,0 +1,343 @@ +from __future__ import annotations + +from datetime import datetime + +import pytest +from bson import Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.set.utils.set_common import ( + STAGE_NAMES, + _replace_stage_name, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + CONFLICTING_PATH_ERROR, + DOTTED_FIELD_IN_SUB_OBJECT_ERROR, + FAILED_TO_PARSE_ERROR, + FIELD_PATH_DOLLAR_PREFIX_ERROR, + FIELD_PATH_EMPTY_COMPONENT_ERROR, + FIELD_PATH_EMPTY_ERROR, + FIELD_PATH_TRAILING_DOT_ERROR, + INVALID_DOLLAR_FIELD_PATH, + MULTIPLE_EXPRESSIONS_ERROR, + SET_SPECIFICATION_NOT_OBJECT_ERROR, + UNRECOGNIZED_EXPRESSION_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF + +# Property [Specification Type Errors]: a non-document argument to $set +# produces an error. +SET_SPEC_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "spec_type_error_string", + docs=[{"_id": 1}], + pipeline=[{"$set": "not_a_doc"}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a string specification", + ), + StageTestCase( + "spec_type_error_int", + docs=[{"_id": 1}], + pipeline=[{"$set": 42}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject an integer specification", + ), + StageTestCase( + "spec_type_error_int64", + docs=[{"_id": 1}], + pipeline=[{"$set": Int64(42)}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject an Int64 specification", + ), + StageTestCase( + "spec_type_error_float", + docs=[{"_id": 1}], + pipeline=[{"$set": 3.14}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a float specification", + ), + StageTestCase( + "spec_type_error_decimal128", + docs=[{"_id": 1}], + pipeline=[{"$set": DECIMAL128_ONE_AND_HALF}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a Decimal128 specification", + ), + StageTestCase( + "spec_type_error_bool", + docs=[{"_id": 1}], + pipeline=[{"$set": True}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a boolean specification", + ), + StageTestCase( + "spec_type_error_null", + docs=[{"_id": 1}], + pipeline=[{"$set": None}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a null specification", + ), + StageTestCase( + "spec_type_error_array", + docs=[{"_id": 1}], + pipeline=[{"$set": [1, 2, 3]}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject an array specification", + ), + StageTestCase( + "spec_type_error_objectid", + docs=[{"_id": 1}], + pipeline=[{"$set": ObjectId("507f1f77bcf86cd799439011")}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject an ObjectId specification", + ), + StageTestCase( + "spec_type_error_datetime", + docs=[{"_id": 1}], + pipeline=[{"$set": datetime(2023, 1, 1)}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a datetime specification", + ), + StageTestCase( + "spec_type_error_timestamp", + docs=[{"_id": 1}], + pipeline=[{"$set": Timestamp(1, 1)}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a Timestamp specification", + ), + StageTestCase( + "spec_type_error_regex", + docs=[{"_id": 1}], + pipeline=[{"$set": Regex("abc", "i")}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a Regex specification", + ), + StageTestCase( + "spec_type_error_binary", + docs=[{"_id": 1}], + pipeline=[{"$set": b"\x01\x02"}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a binary specification", + ), + StageTestCase( + "spec_type_error_code", + docs=[{"_id": 1}], + pipeline=[{"$set": Code("function() {}")}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a Code specification", + ), + StageTestCase( + "spec_type_error_code_with_scope", + docs=[{"_id": 1}], + pipeline=[{"$set": Code("function() {}", {"x": 1})}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a Code with scope specification", + ), + StageTestCase( + "spec_type_error_minkey", + docs=[{"_id": 1}], + pipeline=[{"$set": MinKey()}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a MinKey specification", + ), + StageTestCase( + "spec_type_error_maxkey", + docs=[{"_id": 1}], + pipeline=[{"$set": MaxKey()}], + error_code=SET_SPECIFICATION_NOT_OBJECT_ERROR, + msg="$set should reject a MaxKey specification", + ), +] + +# Property [Field Name Validation Errors]: field names in the $set +# specification must be non-empty, must not start with $, and must not contain +# empty path components (leading, trailing, or consecutive dots). +SET_FIELD_NAME_VALIDATION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "field_name_empty_string", + docs=[{"_id": 1}], + pipeline=[{"$set": {"": 1}}], + error_code=FIELD_PATH_EMPTY_ERROR, + msg="$set should reject an empty string as a field name", + ), + StageTestCase( + "field_name_dollar_a", + docs=[{"_id": 1}], + pipeline=[{"$set": {"$a": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$set should reject a $-prefixed field name", + ), + StageTestCase( + "field_name_nested_dollar_b", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a.$b": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$set should reject a $-prefixed component in a dotted field name", + ), + StageTestCase( + "field_name_dollar_only", + docs=[{"_id": 1}], + pipeline=[{"$set": {"$": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$set should reject a bare '$' as a field name", + ), + StageTestCase( + "field_name_nested_dollar_only", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a.$": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$set should reject a trailing '$' component in a dotted field name", + ), + StageTestCase( + "field_name_leading_dot", + docs=[{"_id": 1}], + pipeline=[{"$set": {".a": 1}}], + error_code=FIELD_PATH_EMPTY_COMPONENT_ERROR, + msg="$set should reject a field name with a leading dot", + ), + StageTestCase( + "field_name_trailing_dot", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a.": 1}}], + error_code=FIELD_PATH_TRAILING_DOT_ERROR, + msg="$set should reject a field name with a trailing dot", + ), + StageTestCase( + "field_name_double_dot", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a..b": 1}}], + error_code=FIELD_PATH_EMPTY_COMPONENT_ERROR, + msg="$set should reject a field name with a double dot", + ), + StageTestCase( + "field_name_single_dot", + docs=[{"_id": 1}], + pipeline=[{"$set": {".": 1}}], + error_code=FIELD_PATH_TRAILING_DOT_ERROR, + msg="$set should reject a single dot as a field name", + ), + StageTestCase( + "field_name_double_dot_only", + docs=[{"_id": 1}], + pipeline=[{"$set": {"..": 1}}], + error_code=FIELD_PATH_TRAILING_DOT_ERROR, + msg="$set should reject a double dot as a field name", + ), + # Trailing dot validation takes precedence over $-prefix validation. + StageTestCase( + "field_name_dollar_dot", + docs=[{"_id": 1}], + pipeline=[{"$set": {"$.": 1}}], + error_code=FIELD_PATH_TRAILING_DOT_ERROR, + msg="$set should reject '$.' with a trailing dot error", + ), + StageTestCase( + "field_name_dollar_dot_a", + docs=[{"_id": 1}], + pipeline=[{"$set": {"$.a": 1}}], + error_code=FIELD_PATH_DOLLAR_PREFIX_ERROR, + msg="$set should reject '$.a' with a $-prefix error", + ), +] + +# Property [Conflicting Path Errors]: parent-child path conflicts in the same +# specification produce an error regardless of field order. +SET_CONFLICTING_PATH_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "conflicting_parent_child", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a": 1, "a.b": 2}}], + error_code=CONFLICTING_PATH_ERROR, + msg="$set should reject parent-child path conflict (parent first)", + ), + StageTestCase( + "conflicting_child_parent", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a.b": 2, "a": 1}}], + error_code=CONFLICTING_PATH_ERROR, + msg="$set should reject parent-child path conflict (child first)", + ), +] + +# Property [Dollar-Sign String Value Errors]: "$" and "$$" as field values +# produce errors because they are invalid field paths and variable names +# respectively. +SET_DOLLAR_SIGN_VALUE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "dollar_sign_value", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a": "$"}}], + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$set should reject '$' as a field value because it is not a valid field path", + ), + StageTestCase( + "double_dollar_sign_value", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a": "$$"}}], + error_code=FAILED_TO_PARSE_ERROR, + msg="$set should reject '$$' as a field value because it is an empty variable name", + ), +] + +# Property [Embedded Object Value Errors]: $-prefixed keys inside embedded +# objects are treated as expression operators and must be valid. Dotted field +# names and multiple operators in a single sub-document are rejected. +SET_EMBEDDED_OBJECT_VALUE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "embedded_unrecognized_expression", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$set": {"a": {"$bogus": 1}}}], + error_code=UNRECOGNIZED_EXPRESSION_ERROR, + msg="$set should reject an unrecognized $-prefixed key inside an embedded object", + ), + StageTestCase( + "embedded_dotted_field_name", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$set": {"a": {"x.y": 1}}}], + error_code=DOTTED_FIELD_IN_SUB_OBJECT_ERROR, + msg="$set should reject a dotted field name inside an embedded object value", + ), + StageTestCase( + "embedded_multiple_expressions", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$set": {"a": {"$add": [1, 2], "$multiply": [3, 4]}}}], + error_code=MULTIPLE_EXPRESSIONS_ERROR, + msg="$set should reject multiple $-prefixed operators in a single sub-document value", + ), +] + +SET_ERROR_TESTS = ( + SET_SPEC_TYPE_ERROR_TESTS + + SET_FIELD_NAME_VALIDATION_ERROR_TESTS + + SET_CONFLICTING_PATH_ERROR_TESTS + + SET_DOLLAR_SIGN_VALUE_ERROR_TESTS + + SET_EMBEDDED_OBJECT_VALUE_ERROR_TESTS +) + + +@pytest.mark.parametrize("stage_name", STAGE_NAMES) +@pytest.mark.parametrize("test_case", pytest_params(SET_ERROR_TESTS)) +def test_set_errors(collection, stage_name: str, test_case: StageTestCase): + """Test $set / $addFields error cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + pipeline = _replace_stage_name(test_case.pipeline, stage_name) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=f"{stage_name!r}: {test_case.msg!r}", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_expressions.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_expressions.py new file mode 100644 index 00000000..847d0a19 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_expressions.py @@ -0,0 +1,1230 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any +from uuid import UUID + +import pytest +from bson import Binary, Decimal128, Int64, ObjectId, Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Expression Support]: each expression operator produces the correct +# result when used as a computed field in $set. +SET_EXPRESSION_TESTS: list[StageTestCase] = [ + # Arithmetic. + StageTestCase( + "expr_abs", + docs=[{"_id": 1, "a": -5}], + pipeline=[{"$set": {"r": {"$abs": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 5}], + msg="$abs should work in $set", + ), + StageTestCase( + "expr_add", + docs=[{"_id": 1, "a": 3, "b": 4}], + pipeline=[{"$set": {"r": {"$add": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 7}], + msg="$add should work in $set", + ), + StageTestCase( + "expr_ceil", + docs=[{"_id": 1, "a": 2.3}], + pipeline=[{"$set": {"r": {"$ceil": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3.0}], + msg="$ceil should work in $set", + ), + StageTestCase( + "expr_divide", + docs=[{"_id": 1, "a": 10, "b": 4}], + pipeline=[{"$set": {"r": {"$divide": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2.5}], + msg="$divide should work in $set", + ), + StageTestCase( + "expr_exp", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$set": {"r": {"$exp": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.0}], + msg="$exp should work in $set", + ), + StageTestCase( + "expr_floor", + docs=[{"_id": 1, "a": 2.7}], + pipeline=[{"$set": {"r": {"$floor": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$floor should work in $set", + ), + StageTestCase( + "expr_ln", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$set": {"r": {"$ln": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2.302585092994046}], + msg="$ln should work in $set", + ), + StageTestCase( + "expr_log", + docs=[{"_id": 1, "a": 100, "b": 10}], + pipeline=[{"$set": {"r": {"$log": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$log should work in $set", + ), + StageTestCase( + "expr_log10", + docs=[{"_id": 1, "a": 1000}], + pipeline=[{"$set": {"r": {"$log10": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3.0}], + msg="$log10 should work in $set", + ), + StageTestCase( + "expr_mod", + docs=[{"_id": 1, "a": 10, "b": 3}], + pipeline=[{"$set": {"r": {"$mod": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1}], + msg="$mod should work in $set", + ), + StageTestCase( + "expr_multiply", + docs=[{"_id": 1, "a": 3, "b": 4}], + pipeline=[{"$set": {"r": {"$multiply": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 12}], + msg="$multiply should work in $set", + ), + StageTestCase( + "expr_pow", + docs=[{"_id": 1, "a": 2, "b": 3}], + pipeline=[{"$set": {"r": {"$pow": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 8}], + msg="$pow should work in $set", + ), + StageTestCase( + "expr_round", + docs=[{"_id": 1, "a": 2.567}], + pipeline=[{"$set": {"r": {"$round": ["$a", 1]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2.6}], + msg="$round should work in $set", + ), + StageTestCase( + "expr_sqrt", + docs=[{"_id": 1, "a": 9}], + pipeline=[{"$set": {"r": {"$sqrt": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3.0}], + msg="$sqrt should work in $set", + ), + StageTestCase( + "expr_subtract", + docs=[{"_id": 1, "a": 10, "b": 3}], + pipeline=[{"$set": {"r": {"$subtract": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 7}], + msg="$subtract should work in $set", + ), + StageTestCase( + "expr_trunc", + docs=[{"_id": 1, "a": 2.9}], + pipeline=[{"$set": {"r": {"$trunc": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$trunc should work in $set", + ), + StageTestCase( + "expr_sigmoid", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$set": {"r": {"$sigmoid": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.5}], + msg="$sigmoid should work in $set", + ), + # Array. + StageTestCase( + "expr_arrayElemAt", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$set": {"r": {"$arrayElemAt": ["$a", 1]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 20}], + msg="$arrayElemAt should work in $set", + ), + StageTestCase( + "expr_arrayToObject", + docs=[{"_id": 1, "a": [["k", "v"]]}], + pipeline=[{"$set": {"r": {"$arrayToObject": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": {"k": "v"}}], + msg="$arrayToObject should work in $set", + ), + StageTestCase( + "expr_concatArrays", + docs=[{"_id": 1, "a": [1], "b": [2]}], + pipeline=[{"$set": {"r": {"$concatArrays": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$concatArrays should work in $set", + ), + StageTestCase( + "expr_filter", + docs=[{"_id": 1, "a": [1, 2, 3, 4]}], + pipeline=[ + {"$set": {"r": {"$filter": {"input": "$a", "cond": {"$gt": ["$$this", 2]}}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": [3, 4]}], + msg="$filter should work in $set", + ), + StageTestCase( + "expr_firstN", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$set": {"r": {"$firstN": {"input": "$a", "n": 2}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$firstN should work in $set", + ), + StageTestCase( + "expr_in", + docs=[{"_id": 1, "a": 2}], + pipeline=[{"$set": {"r": {"$in": ["$a", [1, 2, 3]]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$in should work in $set", + ), + StageTestCase( + "expr_indexOfArray", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$set": {"r": {"$indexOfArray": ["$a", 20]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1}], + msg="$indexOfArray should work in $set", + ), + StageTestCase( + "expr_isArray", + docs=[{"_id": 1, "a": [1, 2]}], + pipeline=[{"$set": {"r": {"$isArray": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$isArray should work in $set", + ), + StageTestCase( + "expr_lastN", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$set": {"r": {"$lastN": {"input": "$a", "n": 2}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [2, 3]}], + msg="$lastN should work in $set", + ), + StageTestCase( + "expr_map", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[ + {"$set": {"r": {"$map": {"input": "$a", "in": {"$multiply": ["$$this", 2]}}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": [2, 4, 6]}], + msg="$map should work in $set", + ), + StageTestCase( + "expr_maxN_array", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$set": {"r": {"$maxN": {"input": "$a", "n": 2}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [3, 2]}], + msg="$maxN should work in $set", + ), + StageTestCase( + "expr_minN_array", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$set": {"r": {"$minN": {"input": "$a", "n": 2}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$minN should work in $set", + ), + StageTestCase( + "expr_objectToArray", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$set": {"r": {"$objectToArray": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [{"k": "x", "v": 1}]}], + msg="$objectToArray should work in $set", + ), + StageTestCase( + "expr_range", + docs=[{"_id": 1}], + pipeline=[{"$set": {"r": {"$range": [0, 3]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [0, 1, 2]}], + msg="$range should work in $set", + ), + StageTestCase( + "expr_reduce", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[ + { + "$project": { + "r": { + "$reduce": { + "input": "$a", + "initialValue": 0, + "in": {"$add": ["$$value", "$$this"]}, + } + } + } + } + ], + expected=[{"_id": 1, "r": 6}], + msg="$reduce should work in $set", + ), + StageTestCase( + "expr_reverseArray", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$set": {"r": {"$reverseArray": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [3, 2, 1]}], + msg="$reverseArray should work in $set", + ), + StageTestCase( + "expr_size", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$set": {"r": {"$size": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3}], + msg="$size should work in $set", + ), + StageTestCase( + "expr_slice", + docs=[{"_id": 1, "a": [1, 2, 3, 4]}], + pipeline=[{"$set": {"r": {"$slice": ["$a", 2]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [1, 2]}], + msg="$slice should work in $set", + ), + StageTestCase( + "expr_sortArray", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[ + {"$set": {"r": {"$sortArray": {"input": "$a", "sortBy": 1}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": [1, 2, 3]}], + msg="$sortArray should work in $set", + ), + StageTestCase( + "expr_zip", + docs=[{"_id": 1, "a": [1, 2], "b": [3, 4]}], + pipeline=[{"$set": {"r": {"$zip": {"inputs": ["$a", "$b"]}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [[1, 3], [2, 4]]}], + msg="$zip should work in $set", + ), + # Bitwise. + StageTestCase( + "expr_bitAnd", + docs=[{"_id": 1, "a": 7, "b": 3}], + pipeline=[{"$set": {"r": {"$bitAnd": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3}], + msg="$bitAnd should work in $set", + ), + StageTestCase( + "expr_bitNot", + docs=[{"_id": 1, "a": Int64(5)}], + pipeline=[{"$set": {"r": {"$bitNot": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Int64(-6)}], + msg="$bitNot should work in $set", + ), + StageTestCase( + "expr_bitOr", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$set": {"r": {"$bitOr": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 7}], + msg="$bitOr should work in $set", + ), + StageTestCase( + "expr_bitXor", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$set": {"r": {"$bitXor": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 6}], + msg="$bitXor should work in $set", + ), + # Boolean. + StageTestCase( + "expr_and", + docs=[{"_id": 1, "a": True, "b": False}], + pipeline=[{"$set": {"r": {"$and": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": False}], + msg="$and should work in $set", + ), + StageTestCase( + "expr_not", + docs=[{"_id": 1, "a": False}], + pipeline=[{"$set": {"r": {"$not": ["$a"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$not should work in $set", + ), + StageTestCase( + "expr_or", + docs=[{"_id": 1, "a": False, "b": True}], + pipeline=[{"$set": {"r": {"$or": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$or should work in $set", + ), + # Comparisons. + StageTestCase( + "expr_cmp", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$set": {"r": {"$cmp": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1}], + msg="$cmp should work in $set", + ), + StageTestCase( + "expr_eq", + docs=[{"_id": 1, "a": 5, "b": 5}], + pipeline=[{"$set": {"r": {"$eq": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$eq should work in $set", + ), + StageTestCase( + "expr_gt", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$set": {"r": {"$gt": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$gt should work in $set", + ), + StageTestCase( + "expr_gte", + docs=[{"_id": 1, "a": 5, "b": 5}], + pipeline=[{"$set": {"r": {"$gte": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$gte should work in $set", + ), + StageTestCase( + "expr_lt", + docs=[{"_id": 1, "a": 3, "b": 5}], + pipeline=[{"$set": {"r": {"$lt": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$lt should work in $set", + ), + StageTestCase( + "expr_lte", + docs=[{"_id": 1, "a": 5, "b": 5}], + pipeline=[{"$set": {"r": {"$lte": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$lte should work in $set", + ), + StageTestCase( + "expr_ne", + docs=[{"_id": 1, "a": 5, "b": 3}], + pipeline=[{"$set": {"r": {"$ne": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$ne should work in $set", + ), + # Conditional. + StageTestCase( + "expr_cond", + docs=[{"_id": 1, "a": 10}], + pipeline=[ + {"$set": {"r": {"$cond": [{"$gt": ["$a", 5]}, "big", "small"]}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": "big"}], + msg="$cond should work in $set", + ), + StageTestCase( + "expr_ifNull", + docs=[{"_id": 1, "a": None}], + pipeline=[{"$set": {"r": {"$ifNull": ["$a", "default"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "default"}], + msg="$ifNull should work in $set", + ), + StageTestCase( + "expr_switch", + docs=[{"_id": 1, "a": 2}], + pipeline=[ + { + "$project": { + "r": { + "$switch": { + "branches": [{"case": {"$eq": ["$a", 2]}, "then": "two"}], + "default": "other", + } + } + } + } + ], + expected=[{"_id": 1, "r": "two"}], + msg="$switch should work in $set", + ), + # Date. + StageTestCase( + "expr_dateAdd", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[ + {"$set": {"r": {"$dateAdd": {"startDate": "$d", "unit": "day", "amount": 1}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": datetime(2024, 1, 2)}], + msg="$dateAdd should work in $set", + ), + StageTestCase( + "expr_dateDiff", + docs=[ + { + "_id": 1, + "a": datetime(2024, 1, 1, tzinfo=timezone.utc), + "b": datetime(2024, 1, 4, tzinfo=timezone.utc), + } + ], + pipeline=[ + {"$set": {"r": {"$dateDiff": {"startDate": "$a", "endDate": "$b", "unit": "day"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": Int64(3)}], + msg="$dateDiff should work in $set", + ), + StageTestCase( + "expr_dateFromParts", + docs=[{"_id": 1}], + pipeline=[ + {"$set": {"r": {"$dateFromParts": {"year": 2024, "month": 6, "day": 15}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": datetime(2024, 6, 15)}], + msg="$dateFromParts should work in $set", + ), + StageTestCase( + "expr_dateFromString", + docs=[{"_id": 1}], + pipeline=[ + {"$set": {"r": {"$dateFromString": {"dateString": "2024-01-01"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": datetime(2024, 1, 1)}], + msg="$dateFromString should work in $set", + ), + StageTestCase( + "expr_dateSubtract", + docs=[{"_id": 1, "d": datetime(2024, 1, 3, tzinfo=timezone.utc)}], + pipeline=[ + {"$set": {"r": {"$dateSubtract": {"startDate": "$d", "unit": "day", "amount": 1}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": datetime(2024, 1, 2)}], + msg="$dateSubtract should work in $set", + ), + StageTestCase( + "expr_dateToParts", + docs=[{"_id": 1, "d": datetime(2024, 3, 15, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$dateToParts": {"date": "$d"}}}}, {"$project": {"r": 1}}], + expected=[ + { + "_id": 1, + "r": { + "year": 2024, + "month": 3, + "day": 15, + "hour": 0, + "minute": 0, + "second": 0, + "millisecond": 0, + }, + } + ], + msg="$dateToParts should work in $set", + ), + StageTestCase( + "expr_dateToString", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[ + {"$set": {"r": {"$dateToString": {"date": "$d", "format": "%Y-%m-%d"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": "2024-01-01"}], + msg="$dateToString should work in $set", + ), + StageTestCase( + "expr_dateTrunc", + docs=[{"_id": 1, "d": datetime(2024, 3, 15, 10, 30, tzinfo=timezone.utc)}], + pipeline=[ + {"$set": {"r": {"$dateTrunc": {"date": "$d", "unit": "month"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": datetime(2024, 3, 1)}], + msg="$dateTrunc should work in $set", + ), + StageTestCase( + "expr_dayOfMonth", + docs=[{"_id": 1, "d": datetime(2024, 3, 15, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$dayOfMonth": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 15}], + msg="$dayOfMonth should work in $set", + ), + StageTestCase( + "expr_dayOfWeek", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$dayOfWeek": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2}], + msg="$dayOfWeek should work in $set", + ), + StageTestCase( + "expr_dayOfYear", + docs=[{"_id": 1, "d": datetime(2024, 2, 1, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$dayOfYear": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 32}], + msg="$dayOfYear should work in $set", + ), + StageTestCase( + "expr_hour", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 14, 0, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$hour": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 14}], + msg="$hour should work in $set", + ), + StageTestCase( + "expr_isoDayOfWeek", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$isoDayOfWeek": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1}], + msg="$isoDayOfWeek should work in $set", + ), + StageTestCase( + "expr_isoWeek", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$isoWeek": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1}], + msg="$isoWeek should work in $set", + ), + StageTestCase( + "expr_isoWeekYear", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$isoWeekYear": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Int64(2024)}], + msg="$isoWeekYear should work in $set", + ), + StageTestCase( + "expr_millisecond", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 0, 0, 0, 123000, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$millisecond": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 123}], + msg="$millisecond should work in $set", + ), + StageTestCase( + "expr_minute", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 10, 45, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$minute": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 45}], + msg="$minute should work in $set", + ), + StageTestCase( + "expr_month", + docs=[{"_id": 1, "d": datetime(2024, 7, 1, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$month": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 7}], + msg="$month should work in $set", + ), + StageTestCase( + "expr_second", + docs=[{"_id": 1, "d": datetime(2024, 1, 1, 0, 0, 30, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$second": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 30}], + msg="$second should work in $set", + ), + StageTestCase( + "expr_toDate", + docs=[{"_id": 1, "a": Int64(1704067200000)}], + pipeline=[{"$set": {"r": {"$toDate": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": datetime(2024, 1, 1)}], + msg="$toDate should work in $set", + ), + StageTestCase( + "expr_week", + docs=[{"_id": 1, "d": datetime(2024, 1, 15, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$week": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2}], + msg="$week should work in $set", + ), + StageTestCase( + "expr_year", + docs=[{"_id": 1, "d": datetime(2024, 6, 1, tzinfo=timezone.utc)}], + pipeline=[{"$set": {"r": {"$year": "$d"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2024}], + msg="$year should work in $set", + ), + # Misc. + StageTestCase( + "expr_binarySize", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$binarySize": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 5}], + msg="$binarySize should work in $set", + ), + StageTestCase( + "expr_bsonSize", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$set": {"r": {"$bsonSize": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 12}], + msg="$bsonSize should work in $set", + ), + StageTestCase( + "expr_getField", + docs=[{"_id": 1, "a": {"x": 42}}], + pipeline=[ + {"$set": {"r": {"$getField": {"field": "x", "input": "$a"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": 42}], + msg="$getField should work in $set", + ), + StageTestCase( + "expr_let", + docs=[{"_id": 1, "a": 5}], + pipeline=[ + {"$set": {"r": {"$let": {"vars": {"x": "$a"}, "in": {"$multiply": ["$$x", 2]}}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": 10}], + msg="$let should work in $set", + ), + StageTestCase( + "expr_literal", + docs=[{"_id": 1}], + pipeline=[{"$set": {"r": {"$literal": "$notAFieldPath"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "$notAFieldPath"}], + msg="$literal should work in $set", + ), + StageTestCase( + "expr_toHashedIndexKey", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$toHashedIndexKey": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Int64(5347277839332858538)}], + msg="$toHashedIndexKey should work in $set", + ), + # Object. + StageTestCase( + "expr_mergeObjects", + docs=[{"_id": 1, "a": {"x": 1}, "b": {"y": 2}}], + pipeline=[{"$set": {"r": {"$mergeObjects": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": {"x": 1, "y": 2}}], + msg="$mergeObjects should work in $set", + ), + StageTestCase( + "expr_setField", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[ + {"$set": {"r": {"$setField": {"field": "y", "input": "$a", "value": 2}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": {"x": 1, "y": 2}}], + msg="$setField should work in $set", + ), + StageTestCase( + "expr_unsetField", + docs=[{"_id": 1, "a": {"x": 1, "y": 2}}], + pipeline=[ + {"$set": {"r": {"$unsetField": {"field": "x", "input": "$a"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": {"y": 2}}], + msg="$unsetField should work in $set", + ), + # Set. + StageTestCase( + "expr_allElementsTrue", + docs=[{"_id": 1, "a": [True, True]}], + pipeline=[{"$set": {"r": {"$allElementsTrue": ["$a"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$allElementsTrue should work in $set", + ), + StageTestCase( + "expr_anyElementTrue", + docs=[{"_id": 1, "a": [False, True]}], + pipeline=[{"$set": {"r": {"$anyElementTrue": ["$a"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$anyElementTrue should work in $set", + ), + StageTestCase( + "expr_setDifference", + docs=[{"_id": 1, "a": [1, 2, 3], "b": [2]}], + pipeline=[{"$set": {"r": {"$setDifference": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [1, 3]}], + msg="$setDifference should work in $set", + ), + StageTestCase( + "expr_setEquals", + docs=[{"_id": 1, "a": [1, 2], "b": [2, 1]}], + pipeline=[{"$set": {"r": {"$setEquals": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$setEquals should work in $set", + ), + StageTestCase( + "expr_setIntersection", + docs=[{"_id": 1, "a": [1, 2, 3], "b": [2, 3, 4]}], + pipeline=[{"$set": {"r": {"$setIntersection": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [2, 3]}], + msg="$setIntersection should work in $set", + ), + StageTestCase( + "expr_setIsSubset", + docs=[{"_id": 1, "a": [1, 2], "b": [1, 2, 3]}], + pipeline=[{"$set": {"r": {"$setIsSubset": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$setIsSubset should work in $set", + ), + # String. + StageTestCase( + "expr_concat", + docs=[{"_id": 1, "a": "hello", "b": " world"}], + pipeline=[{"$set": {"r": {"$concat": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "hello world"}], + msg="$concat should work in $set", + ), + StageTestCase( + "expr_indexOfBytes", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$indexOfBytes": ["$a", "ll"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2}], + msg="$indexOfBytes should work in $set", + ), + StageTestCase( + "expr_indexOfCP", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$indexOfCP": ["$a", "ll"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2}], + msg="$indexOfCP should work in $set", + ), + StageTestCase( + "expr_ltrim", + docs=[{"_id": 1, "a": " hi"}], + pipeline=[{"$set": {"r": {"$ltrim": {"input": "$a"}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "hi"}], + msg="$ltrim should work in $set", + ), + StageTestCase( + "expr_regexFind", + docs=[{"_id": 1, "a": "hello 123"}], + pipeline=[ + {"$set": {"r": {"$regexFind": {"input": "$a", "regex": "[0-9]+"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": {"match": "123", "idx": 6, "captures": []}}], + msg="$regexFind should work in $set", + ), + StageTestCase( + "expr_regexFindAll", + docs=[{"_id": 1, "a": "a1b2"}], + pipeline=[ + {"$set": {"r": {"$regexFindAll": {"input": "$a", "regex": "[0-9]"}}}}, + {"$project": {"r": 1}}, + ], + expected=[ + { + "_id": 1, + "r": [ + {"match": "1", "idx": 1, "captures": []}, + {"match": "2", "idx": 3, "captures": []}, + ], + } + ], + msg="$regexFindAll should work in $set", + ), + StageTestCase( + "expr_regexMatch", + docs=[{"_id": 1, "a": "hello123"}], + pipeline=[ + {"$set": {"r": {"$regexMatch": {"input": "$a", "regex": "[0-9]+"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": True}], + msg="$regexMatch should work in $set", + ), + StageTestCase( + "expr_replaceAll", + docs=[{"_id": 1, "a": "aabbcc"}], + pipeline=[ + {"$set": {"r": {"$replaceAll": {"input": "$a", "find": "b", "replacement": "x"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": "aaxxcc"}], + msg="$replaceAll should work in $set", + ), + StageTestCase( + "expr_replaceOne", + docs=[{"_id": 1, "a": "aabbcc"}], + pipeline=[ + {"$set": {"r": {"$replaceOne": {"input": "$a", "find": "b", "replacement": "x"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": "aaxbcc"}], + msg="$replaceOne should work in $set", + ), + StageTestCase( + "expr_rtrim", + docs=[{"_id": 1, "a": "hi "}], + pipeline=[{"$set": {"r": {"$rtrim": {"input": "$a"}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "hi"}], + msg="$rtrim should work in $set", + ), + StageTestCase( + "expr_split", + docs=[{"_id": 1, "a": "a,b,c"}], + pipeline=[{"$set": {"r": {"$split": ["$a", ","]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": ["a", "b", "c"]}], + msg="$split should work in $set", + ), + StageTestCase( + "expr_strcasecmp", + docs=[{"_id": 1, "a": "abc", "b": "ABC"}], + pipeline=[{"$set": {"r": {"$strcasecmp": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0}], + msg="$strcasecmp should work in $set", + ), + StageTestCase( + "expr_strLenBytes", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$strLenBytes": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 5}], + msg="$strLenBytes should work in $set", + ), + StageTestCase( + "expr_strLenCP", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$strLenCP": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 5}], + msg="$strLenCP should work in $set", + ), + StageTestCase( + "expr_substr", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$substr": ["$a", 1, 3]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "ell"}], + msg="$substr should work in $set", + ), + StageTestCase( + "expr_substrBytes", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$substrBytes": ["$a", 1, 3]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "ell"}], + msg="$substrBytes should work in $set", + ), + StageTestCase( + "expr_substrCP", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$substrCP": ["$a", 1, 3]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "ell"}], + msg="$substrCP should work in $set", + ), + StageTestCase( + "expr_toLower", + docs=[{"_id": 1, "a": "HELLO"}], + pipeline=[{"$set": {"r": {"$toLower": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "hello"}], + msg="$toLower should work in $set", + ), + StageTestCase( + "expr_toString", + docs=[{"_id": 1, "a": 123}], + pipeline=[{"$set": {"r": {"$toString": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "123"}], + msg="$toString should work in $set", + ), + StageTestCase( + "expr_toUpper", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"r": {"$toUpper": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "HELLO"}], + msg="$toUpper should work in $set", + ), + StageTestCase( + "expr_trim", + docs=[{"_id": 1, "a": " hi "}], + pipeline=[{"$set": {"r": {"$trim": {"input": "$a"}}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "hi"}], + msg="$trim should work in $set", + ), + # Timestamp. + StageTestCase( + "expr_tsIncrement", + docs=[{"_id": 1, "t": Timestamp(100, 5)}], + pipeline=[{"$set": {"r": {"$tsIncrement": "$t"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Int64(5)}], + msg="$tsIncrement should work in $set", + ), + StageTestCase( + "expr_tsSecond", + docs=[{"_id": 1, "t": Timestamp(100, 5)}], + pipeline=[{"$set": {"r": {"$tsSecond": "$t"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Int64(100)}], + msg="$tsSecond should work in $set", + ), + # Trigonometry. + StageTestCase( + "expr_acos", + docs=[{"_id": 1, "a": 0.5}], + pipeline=[{"$set": {"r": {"$acos": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.0471975511965979}], + msg="$acos should work in $set", + ), + StageTestCase( + "expr_acosh", + docs=[{"_id": 1, "a": 2}], + pipeline=[{"$set": {"r": {"$acosh": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.3169578969248166}], + msg="$acosh should work in $set", + ), + StageTestCase( + "expr_asin", + docs=[{"_id": 1, "a": 0.5}], + pipeline=[{"$set": {"r": {"$asin": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.5235987755982989}], + msg="$asin should work in $set", + ), + StageTestCase( + "expr_asinh", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$asinh": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.881373587019543}], + msg="$asinh should work in $set", + ), + StageTestCase( + "expr_atan", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$atan": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.7853981633974483}], + msg="$atan should work in $set", + ), + StageTestCase( + "expr_atan2", + docs=[{"_id": 1, "a": 1, "b": 1}], + pipeline=[{"$set": {"r": {"$atan2": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.7853981633974483}], + msg="$atan2 should work in $set", + ), + StageTestCase( + "expr_atanh", + docs=[{"_id": 1, "a": 0.5}], + pipeline=[{"$set": {"r": {"$atanh": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.5493061443340548}], + msg="$atanh should work in $set", + ), + StageTestCase( + "expr_cos", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$set": {"r": {"$cos": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.0}], + msg="$cos should work in $set", + ), + StageTestCase( + "expr_cosh", + docs=[{"_id": 1, "a": 0}], + pipeline=[{"$set": {"r": {"$cosh": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.0}], + msg="$cosh should work in $set", + ), + StageTestCase( + "expr_degreesToRadians", + docs=[{"_id": 1, "a": 90}], + pipeline=[{"$set": {"r": {"$degreesToRadians": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.5707963267948966}], + msg="$degreesToRadians should work in $set", + ), + StageTestCase( + "expr_radiansToDegrees", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$radiansToDegrees": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 57.29577951308232}], + msg="$radiansToDegrees should work in $set", + ), + StageTestCase( + "expr_sin", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$sin": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.8414709848078965}], + msg="$sin should work in $set", + ), + StageTestCase( + "expr_sinh", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$sinh": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.1752011936438014}], + msg="$sinh should work in $set", + ), + StageTestCase( + "expr_tan", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$tan": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.5574077246549023}], + msg="$tan should work in $set", + ), + StageTestCase( + "expr_tanh", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$tanh": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 0.7615941559557649}], + msg="$tanh should work in $set", + ), + # Type. + StageTestCase( + "expr_convert", + docs=[{"_id": 1, "a": "123"}], + pipeline=[ + {"$set": {"r": {"$convert": {"input": "$a", "to": "int"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": 123}], + msg="$convert should work in $set", + ), + StageTestCase( + "expr_isNumber", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$set": {"r": {"$isNumber": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$isNumber should work in $set", + ), + StageTestCase( + "expr_toBool", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"r": {"$toBool": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": True}], + msg="$toBool should work in $set", + ), + StageTestCase( + "expr_toDecimal", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$set": {"r": {"$toDecimal": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Decimal128("42")}], + msg="$toDecimal should work in $set", + ), + StageTestCase( + "expr_toDouble", + docs=[{"_id": 1, "a": "3.14"}], + pipeline=[{"$set": {"r": {"$toDouble": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3.14}], + msg="$toDouble should work in $set", + ), + StageTestCase( + "expr_toInt", + docs=[{"_id": 1, "a": 3.9}], + pipeline=[{"$set": {"r": {"$toInt": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3}], + msg="$toInt should work in $set", + ), + StageTestCase( + "expr_toLong", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$set": {"r": {"$toLong": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Int64(42)}], + msg="$toLong should work in $set", + ), + StageTestCase( + "expr_toObjectId", + docs=[{"_id": 1, "a": "507f1f77bcf86cd799439011"}], + pipeline=[{"$set": {"r": {"$toObjectId": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": ObjectId("507f1f77bcf86cd799439011")}], + msg="$toObjectId should work in $set", + ), + StageTestCase( + "expr_type", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$set": {"r": {"$type": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": "int"}], + msg="$type should work in $set", + ), + StageTestCase( + "expr_toUUID", + docs=[{"_id": 1, "a": "12345678-1234-1234-1234-123456789abc"}], + pipeline=[{"$set": {"r": {"$toUUID": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": Binary.from_uuid(UUID("12345678-1234-1234-1234-123456789abc"))}], + msg="$toUUID should work in $set", + ), + # Accumulator (as expressions in $project). + StageTestCase( + "expr_sum", + docs=[{"_id": 1, "a": [1, 2, 3]}], + pipeline=[{"$set": {"r": {"$sum": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 6}], + msg="$sum should work in $set", + ), + StageTestCase( + "expr_avg", + docs=[{"_id": 1, "a": [2, 4, 6]}], + pipeline=[{"$set": {"r": {"$avg": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 4.0}], + msg="$avg should work in $set", + ), + StageTestCase( + "expr_min", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$set": {"r": {"$min": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1}], + msg="$min should work in $set", + ), + StageTestCase( + "expr_max", + docs=[{"_id": 1, "a": [3, 1, 2]}], + pipeline=[{"$set": {"r": {"$max": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 3}], + msg="$max should work in $set", + ), + StageTestCase( + "expr_first", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$set": {"r": {"$first": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 10}], + msg="$first should work in $set", + ), + StageTestCase( + "expr_last", + docs=[{"_id": 1, "a": [10, 20, 30]}], + pipeline=[{"$set": {"r": {"$last": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 30}], + msg="$last should work in $set", + ), + StageTestCase( + "expr_stdDevPop", + docs=[{"_id": 1, "a": [2, 4, 4, 4, 5, 5, 7, 9]}], + pipeline=[{"$set": {"r": {"$stdDevPop": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 2.0}], + msg="$stdDevPop should work in $set", + ), + StageTestCase( + "expr_stdDevSamp", + docs=[{"_id": 1, "a": [1, 3]}], + pipeline=[{"$set": {"r": {"$stdDevSamp": "$a"}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": 1.4142135623730951}], + msg="$stdDevSamp should work in $set", + ), + StageTestCase( + "expr_median", + docs=[{"_id": 1, "a": [1, 2, 3, 4, 5]}], + pipeline=[ + {"$set": {"r": {"$median": {"input": "$a", "method": "approximate"}}}}, + {"$project": {"r": 1}}, + ], + expected=[{"_id": 1, "r": 3.0}], + msg="$median should work in $set", + ), + StageTestCase( + "expr_percentile", + docs=[{"_id": 1, "a": [1, 2, 3, 4, 5]}], + pipeline=[ + { + "$project": { + "r": {"$percentile": {"input": "$a", "p": [0.5], "method": "approximate"}} + } + } + ], + expected=[{"_id": 1, "r": [3.0]}], + msg="$percentile should work in $set", + ), + # Set (additional). + StageTestCase( + "expr_setUnion", + docs=[{"_id": 1, "a": [1, 2], "b": [2, 3]}], + pipeline=[{"$set": {"r": {"$setUnion": ["$a", "$b"]}}}, {"$project": {"r": 1}}], + expected=[{"_id": 1, "r": [1, 2, 3]}], + msg="$setUnion should work in $set", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SET_EXPRESSION_TESTS)) +def test_set_expression_cases(collection: Any, test_case: StageTestCase): + """Test that expression operators work within $set.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_names.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_names.py new file mode 100644 index 00000000..3de66972 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_names.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.set.utils.set_common import ( + STAGE_NAMES, + _replace_stage_name, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import MISSING + +# Property [Field Name Acceptance]: unicode characters, emoji, spaces, tabs, +# other special characters, and long field names (up to 10,000 characters) are +# accepted in field names. +SET_FIELD_NAME_ACCEPTANCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "field_name_unicode", + docs=[{"_id": 1}], + pipeline=[{"$set": {"\u00e9\u00e8\u00ea": "accented"}}], + expected=[{"_id": 1, "\u00e9\u00e8\u00ea": "accented"}], + msg="$set should accept unicode characters in field names", + ), + StageTestCase( + "field_name_emoji", + docs=[{"_id": 1}], + pipeline=[{"$set": {"\U0001f600\U0001f680": "emoji"}}], + expected=[{"_id": 1, "\U0001f600\U0001f680": "emoji"}], + msg="$set should accept emoji in field names", + ), + StageTestCase( + "field_name_space", + docs=[{"_id": 1}], + pipeline=[{"$set": {"hello world": 1}}], + expected=[{"_id": 1, "hello world": 1}], + msg="$set should accept spaces in field names", + ), + StageTestCase( + "field_name_tab", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a\tb": 1}}], + expected=[{"_id": 1, "a\tb": 1}], + msg="$set should accept tabs in field names", + ), + StageTestCase( + "field_name_special_chars", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a!@#%^&*()=+": 1}}], + expected=[{"_id": 1, "a!@#%^&*()=+": 1}], + msg="$set should accept special characters in field names", + ), + StageTestCase( + "field_name_numeric", + docs=[{"_id": 1}], + pipeline=[{"$set": {"123": 1}}], + expected=[{"_id": 1, "123": 1}], + msg="$set should accept numeric field names", + ), + StageTestCase( + "field_name_non_leading_dollar", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a$bc": 1}}], + expected=[{"_id": 1, "a$bc": 1}], + msg="$set should accept non-leading $ in field names", + ), +] + +# Property [Dollar-Sign String Values]: $-prefixed strings are interpreted as +# field path references. $literal prevents interpretation. Inside an array +# literal, a missing reference produces null rather than omitting the element. +SET_DOLLAR_SIGN_STRING_TESTS: list[StageTestCase] = [ + StageTestCase( + "dollar_field_ref", + docs=[{"_id": 1, "a": "hello"}], + pipeline=[{"$set": {"ref": "$a"}}], + expected=[{"_id": 1, "a": "hello", "ref": "hello"}], + msg="$set should interpret a $-prefixed string as a field reference", + ), + StageTestCase( + "dollar_space_field_ref", + docs=[{"_id": 1, " hello": "found"}], + pipeline=[{"$set": {"ref": "$ hello"}}], + expected=[{"_id": 1, " hello": "found", "ref": "found"}], + msg="$set should interpret '$ hello' as a reference to a field named ' hello'", + ), + StageTestCase( + "dollar_numeric_field_ref", + docs=[{"_id": 1, "123": "found"}], + pipeline=[{"$set": {"ref": "$123"}}], + expected=[{"_id": 1, "123": "found", "ref": "found"}], + msg="$set should interpret '$123' as a reference to a field named '123'", + ), + StageTestCase( + "dollar_literal_prevents_ref", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"ref": {"$literal": "$a"}}}], + expected=[{"_id": 1, "a": 1, "ref": "$a"}], + msg=( + "$set should produce the literal string '$a' when" + " wrapped in $literal, not a field reference" + ), + ), + StageTestCase( + "dollar_missing_ref_in_array_produces_null", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"ref": [1, MISSING, 3]}}], + expected=[{"_id": 1, "a": 1, "ref": [1, None, 3]}], + msg=( + "$set should produce null for a missing field reference" + " inside an array literal, unlike at the top level where" + " the field is omitted" + ), + ), +] + +SET_FIELD_NAME_TESTS = SET_FIELD_NAME_ACCEPTANCE_TESTS + SET_DOLLAR_SIGN_STRING_TESTS + + +@pytest.mark.parametrize("stage_name", STAGE_NAMES) +@pytest.mark.parametrize("test_case", pytest_params(SET_FIELD_NAME_TESTS)) +def test_set_field_names(collection, stage_name: str, test_case: StageTestCase): + """Test $set / $addFields field name acceptance and dollar-sign string cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + pipeline = _replace_stage_name(test_case.pipeline, stage_name) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=f"{stage_name!r}: {test_case.msg!r}", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_values.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_values.py new file mode 100644 index 00000000..df86b2d1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_field_values.py @@ -0,0 +1,264 @@ +from __future__ import annotations + +import pytest +from bson import Timestamp + +from documentdb_tests.compatibility.tests.core.operator.stages.set.utils.set_common import ( + STAGE_NAMES, + _replace_stage_name, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import MISSING + +# Property [Null and Missing Field Values]: null is a concrete value that is +# included in output, while a reference to a missing field causes the output +# field to be omitted entirely. +SET_NULL_MISSING_TESTS: list[StageTestCase] = [ + StageTestCase( + "null_literal", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"b": None}}], + expected=[{"_id": 1, "a": 1, "b": None}], + msg="$set should add a field with value null when set to null literal", + ), + StageTestCase( + "null_overwrite_existing", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$set": {"a": None}}], + expected=[{"_id": 1, "a": None}], + msg="$set should overwrite an existing field with null", + ), + StageTestCase( + "missing_field_ref_omits", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"b": MISSING}}], + expected=[{"_id": 1, "a": 1}], + msg="$set should omit the field when set to a reference to a missing field", + ), + StageTestCase( + "missing_field_ref_removes_existing", + docs=[{"_id": 1, "a": 1, "b": 2}], + pipeline=[{"$set": {"b": MISSING}}], + expected=[{"_id": 1, "a": 1}], + msg="$set should remove an existing field when set to a reference to a missing field", + ), + StageTestCase( + "multi_field_null_and_missing_independent", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"b": None, "c": MISSING, "d": 42}}], + expected=[{"_id": 1, "a": 1, "b": None, "d": 42}], + msg=( + "$set should apply null and missing behaviors independently" + " per field in a multi-field spec" + ), + ), +] + +# Property [Field Addition]: setting a field that does not exist on the input +# document adds it to the output, preserving all existing fields. +SET_FIELD_ADDITION_TESTS: list[StageTestCase] = [ + StageTestCase( + "add_new_field", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"b": 2}}], + expected=[{"_id": 1, "a": 1, "b": 2}], + msg="$set should add a new field when the field name does not exist", + ), + StageTestCase( + "add_new_field_expression", + docs=[{"_id": 1, "a": 5}], + pipeline=[{"$set": {"b": {"$add": ["$a", 10]}}}], + expected=[{"_id": 1, "a": 5, "b": 15}], + msg="$set should add a new field computed from an expression", + ), + StageTestCase( + "add_multiple_fields", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"b": 2, "c": 3, "d": 4}}], + expected=[{"_id": 1, "a": 1, "b": 2, "c": 3, "d": 4}], + msg="$set should add multiple new fields in a single stage", + ), + StageTestCase( + "add_large_spec_500_fields", + docs=[{"_id": 1}], + pipeline=[{"$set": {f"f{i}": i for i in range(500)}}], + expected=[{"_id": 1, **{f"f{i}": i for i in range(500)}}], + msg="$set should succeed with 500 fields in a single specification", + ), +] + +# Property [Field Overwrite]: setting a field that already exists on the input +# document replaces its value, and _id can be overwritten with a literal or +# expression result. +SET_FIELD_OVERWRITE_TESTS: list[StageTestCase] = [ + StageTestCase( + "overwrite_existing_field", + docs=[{"_id": 1, "a": 10}], + pipeline=[{"$set": {"a": 99}}], + expected=[{"_id": 1, "a": 99}], + msg="$set should replace the value of an existing field", + ), + StageTestCase( + "overwrite_id_literal", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"_id": "new_id"}}], + expected=[{"_id": "new_id", "a": 1}], + msg="$set should overwrite _id with a literal value", + ), + StageTestCase( + "overwrite_id_expression", + docs=[{"_id": 1, "a": 5}], + pipeline=[{"$set": {"_id": {"$add": ["$a", 10]}}}], + expected=[{"_id": 15, "a": 5}], + msg="$set should overwrite _id with an expression result", + ), +] + +# Property [$$REMOVE]: the $$REMOVE system variable explicitly removes a field +# from the output document. $literal wrapping prevents removal and produces the +# literal string instead. +SET_REMOVE_TESTS: list[StageTestCase] = [ + StageTestCase( + "remove_existing_field", + docs=[{"_id": 1, "a": 1, "b": 2}], + pipeline=[{"$set": {"b": "$$REMOVE"}}], + expected=[{"_id": 1, "a": 1}], + msg="$set should remove an existing field when set to $$REMOVE", + ), + StageTestCase( + "remove_nonexistent_field", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"b": "$$REMOVE"}}], + expected=[{"_id": 1, "a": 1}], + msg="$set should be a no-op when $$REMOVE targets a non-existent field", + ), + StageTestCase( + "remove_id", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"_id": "$$REMOVE"}}], + expected=[{"a": 1}], + msg="$set should remove _id when set to $$REMOVE", + ), + StageTestCase( + "remove_all_fields", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"_id": "$$REMOVE", "a": "$$REMOVE"}}], + expected=[{}], + msg="$set should produce an empty document when all fields are removed via $$REMOVE", + ), + StageTestCase( + "remove_dot_notation", + docs=[{"_id": 1, "a": {"x": 1, "y": 2}}], + pipeline=[{"$set": {"a.x": "$$REMOVE"}}], + expected=[{"_id": 1, "a": {"y": 2}}], + msg="$set should remove a nested field via dot notation with $$REMOVE", + ), + StageTestCase( + "remove_array_traversal", + docs=[{"_id": 1, "arr": [{"x": 1, "y": 2}, {"x": 3, "y": 4}]}], + pipeline=[{"$set": {"arr.x": "$$REMOVE"}}], + expected=[{"_id": 1, "arr": [{"y": 2}, {"y": 4}]}], + msg="$set should remove a field from each array element via $$REMOVE with dot notation", + ), + StageTestCase( + "remove_conditional_cond", + docs=[{"_id": 1, "a": 1, "b": "keep"}, {"_id": 2, "a": 2, "b": "drop"}], + pipeline=[{"$set": {"b": {"$cond": [{"$eq": ["$a", 2]}, "$$REMOVE", "$b"]}}}], + expected=[{"_id": 1, "a": 1, "b": "keep"}, {"_id": 2, "a": 2}], + msg="$set should conditionally remove a field via $cond returning $$REMOVE", + ), + StageTestCase( + "remove_literal_wrapping", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"b": {"$literal": "$$REMOVE"}}}], + expected=[{"_id": 1, "a": 1, "b": "$$REMOVE"}], + msg=( + "$set should produce the literal string '$$REMOVE' when" + " wrapped in $literal, not removal" + ), + ), +] + +# Property [Empty Specification]: an empty specification is a no-op and +# documents pass through unchanged. +SET_EMPTY_SPEC_TESTS: list[StageTestCase] = [ + StageTestCase( + "empty_spec_passthrough", + docs=[{"_id": 1, "a": 1, "b": "hello"}], + pipeline=[{"$set": {}}], + expected=[{"_id": 1, "a": 1, "b": "hello"}], + msg="$set with an empty specification should pass documents through unchanged", + ), +] + +# Property [Values 0, false, 1, true]: numeric 0 and 1 and boolean false and +# true are treated as literal values in $set, not as inclusion/exclusion flags. +SET_LITERAL_VALUE_TESTS: list[StageTestCase] = [ + StageTestCase( + "zero_false_one_true_add", + docs=[{"_id": 1, "a": "original"}], + pipeline=[{"$set": {"b": 0, "c": False, "d": 1, "e": True}}], + expected=[{"_id": 1, "a": "original", "b": 0, "c": False, "d": 1, "e": True}], + msg="$set should treat 0, false, 1, and true as literal values when adding new fields", + ), + StageTestCase( + "zero_false_one_true_overwrite", + docs=[{"_id": 1, "b": "old_b", "c": "old_c", "d": "old_d", "e": "old_e"}], + pipeline=[{"$set": {"b": 0, "c": False, "d": 1, "e": True}}], + expected=[{"_id": 1, "b": 0, "c": False, "d": 1, "e": True}], + msg=( + "$set should treat 0, false, 1, and true as literal" + " values when overwriting existing fields" + ), + ), +] + +# Property [Timestamp Behavior]: Timestamp(0, 0) as a literal value in $set +# is preserved as-is, not replaced by the server unlike on insert. +SET_TIMESTAMP_TESTS: list[StageTestCase] = [ + StageTestCase( + "timestamp_zero_zero_preserved", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"v": Timestamp(0, 0)}}], + expected=[{"_id": 1, "a": 1, "v": Timestamp(0, 0)}], + msg="$set should preserve Timestamp(0, 0) as-is, not replace it with the current time", + ), +] + +SET_FIELD_VALUE_TESTS = ( + SET_NULL_MISSING_TESTS + + SET_FIELD_ADDITION_TESTS + + SET_FIELD_OVERWRITE_TESTS + + SET_REMOVE_TESTS + + SET_EMPTY_SPEC_TESTS + + SET_LITERAL_VALUE_TESTS + + SET_TIMESTAMP_TESTS +) + + +@pytest.mark.parametrize("stage_name", STAGE_NAMES) +@pytest.mark.parametrize("test_case", pytest_params(SET_FIELD_VALUE_TESTS)) +def test_set_field_values(collection, stage_name: str, test_case: StageTestCase): + """Test $set / $addFields field value cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + pipeline = _replace_stage_name(test_case.pipeline, stage_name) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=f"{stage_name!r}: {test_case.msg!r}", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_paths.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_paths.py new file mode 100644 index 00000000..ee83ae4a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_paths.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.set.utils.set_common import ( + STAGE_NAMES, + _replace_stage_name, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Same-Stage Field References]: field references within a single +# $set stage resolve against the original input document, not against fields +# being set in the same stage. +SET_SAME_STAGE_REF_TESTS: list[StageTestCase] = [ + StageTestCase( + "same_stage_ref_swap", + docs=[{"_id": 1, "a": 10, "b": 20}], + pipeline=[{"$set": {"a": "$b", "b": "$a"}}], + expected=[{"_id": 1, "a": 20, "b": 10}], + msg=( + "$set should swap field values because both references" + " resolve against the original document" + ), + ), + StageTestCase( + "same_stage_ref_forward_existing", + docs=[{"_id": 1, "a": 1}], + pipeline=[{"$set": {"a": 99, "b": "$a"}}], + expected=[{"_id": 1, "a": 99, "b": 1}], + msg=( + "$set should resolve a forward reference to a field being" + " overwritten against the original value" + ), + ), + StageTestCase( + "same_stage_ref_forward_new_field", + docs=[{"_id": 1, "x": 5}], + pipeline=[{"$set": {"a": 100, "b": "$a"}}], + expected=[{"_id": 1, "x": 5, "a": 100}], + msg=( + "$set should omit a field when it references a field being" + " added in the same stage that did not exist on the original document" + ), + ), +] + +# Property [Dot Notation]: dot-separated field paths in $set traverse or create +# nested document structure, and descend into arrays to apply to each element. +SET_DOT_NOTATION_TESTS: list[StageTestCase] = [ + StageTestCase( + "dot_creates_nested", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a.b.c": 1}}], + expected=[{"_id": 1, "a": {"b": {"c": 1}}}], + msg="$set should create nested structure when intermediate path does not exist", + ), + StageTestCase( + "dot_adds_to_existing_embedded", + docs=[{"_id": 1, "a": {"x": 10}}], + pipeline=[{"$set": {"a.y": 20}}], + expected=[{"_id": 1, "a": {"x": 10, "y": 20}}], + msg="$set should add a field to an existing embedded document preserving siblings", + ), + StageTestCase( + "dot_overwrites_existing_nested", + docs=[{"_id": 1, "a": {"x": 10, "y": 20}}], + pipeline=[{"$set": {"a.x": 99}}], + expected=[{"_id": 1, "a": {"x": 99, "y": 20}}], + msg="$set should overwrite an existing nested field via dot notation", + ), + StageTestCase( + "dot_traverses_array", + docs=[{"_id": 1, "arr": [{"x": 1}, {"x": 2}]}], + pipeline=[{"$set": {"arr.y": 99}}], + expected=[{"_id": 1, "arr": [{"x": 1, "y": 99}, {"x": 2, "y": 99}]}], + msg="$set should add the field to each object element when dot notation traverses an array", + ), + StageTestCase( + "dot_array_scalar_replaced", + docs=[{"_id": 1, "arr": [{"x": 1}, 42, "hello"]}], + pipeline=[{"$set": {"arr.y": 99}}], + expected=[{"_id": 1, "arr": [{"x": 1, "y": 99}, {"y": 99}, {"y": 99}]}], + msg="$set should replace scalar array elements with an object containing the new field", + ), + StageTestCase( + "dot_nested_array_traversal", + docs=[{"_id": 1, "a": [[{"x": 1}], [{"x": 2}]]}], + pipeline=[{"$set": {"a.x": 99}}], + expected=[{"_id": 1, "a": [[{"x": 99}], [{"x": 99}]]}], + msg="$set should traverse into nested arrays recursively via dot notation", + ), + StageTestCase( + "dot_through_scalar_parent", + docs=[{"_id": 1, "x": 42}], + pipeline=[{"$set": {"x.y": 10}}], + expected=[{"_id": 1, "x": {"y": 10}}], + msg=( + "$set should replace a scalar parent with an object" + " when dot notation traverses through it" + ), + ), + StageTestCase( + "dot_null_in_array", + docs=[{"_id": 1, "arr": [{"x": 1}, None, {"x": 3}]}], + pipeline=[{"$set": {"arr.y": 99}}], + expected=[{"_id": 1, "arr": [{"x": 1, "y": 99}, {"y": 99}, {"x": 3, "y": 99}]}], + msg="$set should convert a null element in an array to an object containing the new field", + ), + StageTestCase( + "dot_numeric_path_as_field_name", + docs=[{"_id": 1, "arr": [10, 20, 30]}], + pipeline=[{"$set": {"arr.0": "val"}}], + expected=[{"_id": 1, "arr": [{"0": "val"}, {"0": "val"}, {"0": "val"}]}], + msg="$set should treat numeric path components as field names, not array indices", + ), + StageTestCase( + "dot_deeply_nested", + docs=[{"_id": 1}], + pipeline=[{"$set": {"a.b.c.d": "deep"}}], + expected=[{"_id": 1, "a": {"b": {"c": {"d": "deep"}}}}], + msg="$set should handle deeply nested dot notation without error", + ), + StageTestCase( + "dot_id_sub_document", + docs=[{"_id": {"a": 1}, "x": 5}], + pipeline=[{"$set": {"_id.sub": "hello"}}], + expected=[{"_id": {"a": 1, "sub": "hello"}, "x": 5}], + msg="$set should add a field to an _id sub-document via dot notation", + ), + StageTestCase( + "dot_sibling_paths_same_depth", + docs=[{"_id": 1, "a": {"x": 1}}], + pipeline=[{"$set": {"a.b": 2, "a.c": 3}}], + expected=[{"_id": 1, "a": {"x": 1, "b": 2, "c": 3}}], + msg="$set should accept sibling dot paths at the same depth and set both fields", + ), +] + +# Property [Embedded Object Values]: when a field is set to a non-empty object +# without $-prefixed keys, $set merges it recursively with the existing value +# rather than replacing it. An empty object or $literal-wrapped object replaces +# the existing value entirely. +SET_EMBEDDED_OBJECT_TESTS: list[StageTestCase] = [ + StageTestCase( + "embedded_merge_preserves_siblings", + docs=[{"_id": 1, "a": {"x": 1, "y": 2}}], + pipeline=[{"$set": {"a": {"z": 3}}}], + expected=[{"_id": 1, "a": {"x": 1, "y": 2, "z": 3}}], + msg=( + "$set should merge a non-empty embedded object with an" + " existing nested document, preserving unmentioned fields" + ), + ), + StageTestCase( + "embedded_empty_replaces", + docs=[{"_id": 1, "a": {"x": 1, "y": 2}}], + pipeline=[{"$set": {"a": {}}}], + expected=[{"_id": 1, "a": {}}], + msg="$set should replace the existing value entirely when set to an empty object", + ), + StageTestCase( + "embedded_asymmetric_merge_and_replace", + docs=[{"_id": 1, "a": {"x": 1, "y": 2}, "b": {"x": 1, "y": 2}}], + pipeline=[{"$set": {"a": {"z": 3}, "b": {}}}], + expected=[{"_id": 1, "a": {"x": 1, "y": 2, "z": 3}, "b": {}}], + msg=( + "$set should merge a non-empty object but replace with an" + " empty object in the same specification" + ), + ), + StageTestCase( + "embedded_literal_replaces", + docs=[{"_id": 1, "a": {"x": 1, "y": 2}}], + pipeline=[{"$set": {"a": {"$literal": {"z": 3}}}}], + expected=[{"_id": 1, "a": {"z": 3}}], + msg=( + "$set should replace the existing value entirely when" + " the object is wrapped in $literal" + ), + ), + StageTestCase( + "embedded_on_scalar_replaces", + docs=[{"_id": 1, "a": 42}], + pipeline=[{"$set": {"a": {"z": 3}}}], + expected=[{"_id": 1, "a": {"z": 3}}], + msg="$set should replace a scalar field with an embedded object", + ), + StageTestCase( + "embedded_on_array_traverses", + docs=[{"_id": 1, "a": [{"x": 1}, {"x": 2}]}], + pipeline=[{"$set": {"a": {"y": 99}}}], + expected=[{"_id": 1, "a": [{"x": 1, "y": 99}, {"x": 2, "y": 99}]}], + msg="$set should traverse an array and merge the embedded object into each element", + ), +] + +SET_PATH_TESTS = SET_SAME_STAGE_REF_TESTS + SET_DOT_NOTATION_TESTS + SET_EMBEDDED_OBJECT_TESTS + + +@pytest.mark.parametrize("stage_name", STAGE_NAMES) +@pytest.mark.parametrize("test_case", pytest_params(SET_PATH_TESTS)) +def test_set_paths(collection, stage_name: str, test_case: StageTestCase): + """Test $set / $addFields path traversal and embedded object cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + pipeline = _replace_stage_name(test_case.pipeline, stage_name) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=f"{stage_name!r}: {test_case.msg!r}", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_pipeline.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_pipeline.py new file mode 100644 index 00000000..ad0e7272 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/test_set_pipeline.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.set.utils.set_common import ( + STAGE_NAMES, + _replace_stage_name, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Empty Collection]: $set on a collection with no documents returns +# an empty result set without error. +SET_EMPTY_COLLECTION_TESTS: list[StageTestCase] = [ + StageTestCase( + "empty_collection", + docs=[], + pipeline=[{"$set": {"b": 1}}], + expected=[], + msg="$set on an empty collection should return empty result", + ), +] + +# Property [Multiple Documents]: $set applies the specification to every +# document in the input independently. +SET_MULTIPLE_DOCUMENTS_TESTS: list[StageTestCase] = [ + StageTestCase( + "multiple_documents", + docs=[{"_id": 1, "a": 10}, {"_id": 2, "a": 20}, {"_id": 3, "a": 30}], + pipeline=[{"$set": {"b": {"$add": ["$a", 1]}}}], + expected=[ + {"_id": 1, "a": 10, "b": 11}, + {"_id": 2, "a": 20, "b": 21}, + {"_id": 3, "a": 30, "b": 31}, + ], + msg="$set should apply to every document independently", + ), +] + +# Property [Consecutive Stages]: multiple $set stages compose sequentially, +# with each stage seeing the output of the previous one. +SET_CONSECUTIVE_STAGES_TESTS: list[StageTestCase] = [ + StageTestCase( + "consecutive_stages", + docs=[{"_id": 1, "a": 5}], + pipeline=[ + {"$set": {"b": {"$add": ["$a", 10]}}}, + {"$set": {"c": {"$add": ["$b", 100]}}}, + ], + expected=[{"_id": 1, "a": 5, "b": 15, "c": 115}], + msg="$set second stage should see fields added by the first stage", + ), +] + +SET_PIPELINE_TESTS = ( + SET_EMPTY_COLLECTION_TESTS + SET_MULTIPLE_DOCUMENTS_TESTS + SET_CONSECUTIVE_STAGES_TESTS +) + + +@pytest.mark.parametrize("stage_name", STAGE_NAMES) +@pytest.mark.parametrize("test_case", pytest_params(SET_PIPELINE_TESTS)) +def test_set_pipeline(collection, stage_name: str, test_case: StageTestCase): + """Test $set / $addFields pipeline-level behavior cases.""" + if test_case.docs: + collection.insert_many(test_case.docs) + pipeline = _replace_stage_name(test_case.pipeline, stage_name) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=f"{stage_name!r}: {test_case.msg!r}", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/set/utils/set_common.py b/documentdb_tests/compatibility/tests/core/operator/stages/set/utils/set_common.py new file mode 100644 index 00000000..ce31ea8a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/set/utils/set_common.py @@ -0,0 +1,31 @@ +"""Shared infrastructure for $set / $addFields tests.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.addFields.utils.operator_addFields import ( # noqa: E501 + ADD_FIELDS_OPERATOR, +) + +STAGE_NAMES = [ + pytest.param("$set", id="set"), + ADD_FIELDS_OPERATOR, +] + + +def _replace_stage_name( + pipeline: list[dict[str, Any]] | None, stage_name: str +) -> list[dict[str, Any]]: + """Swap $set for the given stage name so tests run against both aliases.""" + assert pipeline is not None, "test case must define a pipeline" + result = [] + for stage in pipeline: + if "$set" in stage: + stage_content = stage["$set"] + result.append({stage_name: stage_content}) + else: + result.append(stage) + return result diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_set.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_set.py new file mode 100644 index 00000000..2fb3fbd0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_set.py @@ -0,0 +1,147 @@ +"""Tests for $set composing with other stages at different pipeline positions.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Position]: $set adds or overwrites fields correctly when +# composed with other stage types at different pipeline positions. +SET_PIPELINE_POSITION_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_after_match", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 30}, + ], + pipeline=[ + {"$match": {"a": {"$gt": 15}}}, + {"$set": {"b": {"$multiply": ["$a", 2]}}}, + ], + expected=[{"_id": 2, "a": 30, "b": 60}], + msg="$set should work after a $match stage", + ), + StageTestCase( + "pipeline_before_match", + docs=[ + {"_id": 1, "a": 10}, + {"_id": 2, "a": 30}, + ], + pipeline=[ + {"$set": {"b": {"$multiply": ["$a", 2]}}}, + {"$match": {"b": {"$gt": 40}}}, + ], + expected=[{"_id": 2, "a": 30, "b": 60}], + msg="$set should add fields visible to a subsequent $match stage", + ), + StageTestCase( + "pipeline_set_then_sort", + docs=[ + {"_id": 1, "a": 30}, + {"_id": 2, "a": 10}, + {"_id": 3, "a": 20}, + ], + pipeline=[ + {"$set": {"b": {"$multiply": ["$a", -1]}}}, + {"$sort": {"b": 1}}, + ], + expected=[ + {"_id": 1, "a": 30, "b": -30}, + {"_id": 3, "a": 20, "b": -20}, + {"_id": 2, "a": 10, "b": -10}, + ], + msg="$set should add fields usable by a subsequent $sort stage", + ), + StageTestCase( + "pipeline_after_group", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "b", "val": 3}, + {"_id": 3, "cat": "a", "val": 7}, + ], + pipeline=[ + {"$group": {"_id": "$cat", "total": {"$sum": "$val"}}}, + {"$set": {"doubled": {"$multiply": ["$total", 2]}}}, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": "a", "total": 12, "doubled": 24}, + {"_id": "b", "total": 3, "doubled": 6}, + ], + msg="$set should compute fields from $group output", + ), + StageTestCase( + "pipeline_before_group", + docs=[ + {"_id": 1, "cat": "a", "val": 5}, + {"_id": 2, "cat": "a", "val": 7}, + ], + pipeline=[ + {"$set": {"doubled": {"$multiply": ["$val", 2]}}}, + {"$group": {"_id": "$cat", "total": {"$sum": "$doubled"}}}, + ], + expected=[{"_id": "a", "total": 24}], + msg="$set should add fields usable by a subsequent $group stage", + ), + StageTestCase( + "pipeline_after_unwind", + docs=[{"_id": 1, "a": [10, 20]}], + pipeline=[ + {"$unwind": "$a"}, + {"$set": {"b": {"$multiply": ["$a", 2]}}}, + ], + expected=[ + {"_id": 1, "a": 10, "b": 20}, + {"_id": 1, "a": 20, "b": 40}, + ], + msg="$set should add fields to each unwound document", + ), + StageTestCase( + "pipeline_after_project", + docs=[{"_id": 1, "a": 5, "b": 10}], + pipeline=[ + {"$project": {"a": 1}}, + {"$set": {"c": {"$multiply": ["$a", 3]}}}, + ], + expected=[{"_id": 1, "a": 5, "c": 15}], + msg="$set should add fields to documents narrowed by $project", + ), + StageTestCase( + "pipeline_after_replaceRoot", + docs=[{"_id": 1, "inner": {"x": 10, "y": 20}}], + pipeline=[ + {"$replaceRoot": {"newRoot": "$inner"}}, + {"$set": {"z": {"$add": ["$x", "$y"]}}}, + ], + expected=[{"x": 10, "y": 20, "z": 30}], + msg="$set should add fields to documents produced by $replaceRoot", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SET_PIPELINE_POSITION_TESTS)) +def test_stages_position_set_cases(collection, test_case: StageTestCase): + """Test $set composing with other stages at different pipeline positions.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + )