diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/count/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/count/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_behavior.py b/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_behavior.py new file mode 100644 index 00000000..30e33627 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_behavior.py @@ -0,0 +1,119 @@ +"""Tests for $count aggregation stage — core counting behavior.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Core Counting Behavior]: the output is exactly one document whose +# field value equals the number of documents reaching the $count stage. +COUNT_CORE_TESTS: list[StageTestCase] = [ + StageTestCase( + "core_single_doc", + docs=[{"_id": 1, "x": 1}], + pipeline=[{"$count": "total"}], + expected=[{"total": 1}], + msg="$count should return 1 for a single input document", + ), + StageTestCase( + "core_multiple_docs", + docs=[{"_id": 1}, {"_id": 2}, {"_id": 3}, {"_id": 4}, {"_id": 5}], + pipeline=[{"$count": "total"}], + expected=[{"total": 5}], + msg="$count should return the number of input documents", + ), + StageTestCase( + "core_large_collection", + docs=[{"_id": i} for i in range(10_000)], + pipeline=[{"$count": "total"}], + expected=[{"total": 10_000}], + msg="$count should return correct count for a large number of documents", + ), +] + +# Property [Empty Input Behavior]: when zero documents reach the $count stage, +# the result is an empty cursor rather than a document with count 0. +COUNT_EMPTY_INPUT_TESTS: list[StageTestCase] = [ + StageTestCase( + "empty_collection", + docs=None, + pipeline=[{"$count": "total"}], + expected=[], + msg="$count should return empty cursor on an empty collection", + ), +] + +# Property [Single Document Output]: the output is always exactly 0 or 1 +# documents, and consecutive $count stages each count the single document +# output by the previous stage. +COUNT_SINGLE_OUTPUT_TESTS: list[StageTestCase] = [ + StageTestCase( + "cardinality_many_docs_one_output", + docs=[{"_id": i} for i in range(50)], + pipeline=[ + {"$count": "total"}, + {"$count": "num_results"}, + ], + expected=[{"num_results": 1}], + msg="$count should produce exactly one document regardless of input count", + ), +] + +# Property [Return Type]: the count value is int32 for practically testable +# document counts. The server promotes to long when the count exceeds the +# int32 maximum (~2.1 billion), and to double beyond int64, but exercising +# those transitions would require inserting billions of documents. +COUNT_RETURN_TYPE_TESTS: list[StageTestCase] = [ + StageTestCase( + "return_type_single", + docs=[{"_id": 1}], + pipeline=[ + {"$count": "n"}, + {"$addFields": {"type": {"$type": "$n"}}}, + ], + expected=[{"n": 1, "type": "int"}], + msg="$count should return int32 for a count of 1", + ), + StageTestCase( + "return_type_multiple", + docs=[{"_id": i} for i in range(10_000)], + pipeline=[ + {"$count": "n"}, + {"$addFields": {"type": {"$type": "$n"}}}, + ], + expected=[{"n": 10_000, "type": "int"}], + msg="$count should return int32 for a count of 10000", + ), +] + +COUNT_BEHAVIOR_TESTS = ( + COUNT_CORE_TESTS + COUNT_EMPTY_INPUT_TESTS + COUNT_SINGLE_OUTPUT_TESTS + COUNT_RETURN_TYPE_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(COUNT_BEHAVIOR_TESTS)) +def test_count_behavior(collection, test_case: StageTestCase): + """Test $count core counting behavior.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_errors.py new file mode 100644 index 00000000..197c4e00 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_errors.py @@ -0,0 +1,391 @@ +"""Tests for $count aggregation stage — error handling.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + COUNT_FIELD_DOLLAR_PREFIX_ERROR, + COUNT_FIELD_DOT_ERROR, + COUNT_FIELD_EMPTY_ERROR, + COUNT_FIELD_ID_RESERVED_ERROR, + COUNT_FIELD_NULL_BYTE_ERROR, + COUNT_FIELD_TYPE_ERROR, + PIPELINE_STAGE_EXTRA_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ZERO + +# Property [Arity Errors]: the $count stage document must contain exactly one +# key, and the multi-key error fires before any validation of the value. +COUNT_ARITY_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "arity_extra_key", + docs=[{"_id": 1}], + pipeline=[{"$count": "total", "extra": 1}], + error_code=PIPELINE_STAGE_EXTRA_FIELD_ERROR, + msg="$count should reject a stage document with multiple keys", + ), +] + +# Property [Type Strictness]: any non-string type is rejected as the count field. +COUNT_TYPE_STRICTNESS_TESTS: list[StageTestCase] = [ + StageTestCase( + "type_int32", + docs=[{"_id": 1}], + pipeline=[{"$count": 42}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject int32 value", + ), + StageTestCase( + "type_int64", + docs=[{"_id": 1}], + pipeline=[{"$count": Int64(42)}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject int64 value", + ), + StageTestCase( + "type_double", + docs=[{"_id": 1}], + pipeline=[{"$count": 3.14}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject double value", + ), + StageTestCase( + "type_decimal128", + docs=[{"_id": 1}], + pipeline=[{"$count": DECIMAL128_ZERO}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject Decimal128 value", + ), + StageTestCase( + "type_bool", + docs=[{"_id": 1}], + pipeline=[{"$count": True}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject boolean value", + ), + StageTestCase( + "type_null", + docs=[{"_id": 1}], + pipeline=[{"$count": None}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject null value", + ), + StageTestCase( + "type_array", + docs=[{"_id": 1}], + pipeline=[{"$count": ["x"]}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject array value", + ), + StageTestCase( + "type_object", + docs=[{"_id": 1}], + pipeline=[{"$count": {"x": 1}}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject object value", + ), + StageTestCase( + "type_objectid", + docs=[{"_id": 1}], + pipeline=[{"$count": ObjectId("000000000000000000000001")}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject ObjectId value", + ), + StageTestCase( + "type_datetime", + docs=[{"_id": 1}], + pipeline=[{"$count": datetime(2024, 1, 1, tzinfo=timezone.utc)}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject datetime value", + ), + StageTestCase( + "type_timestamp", + docs=[{"_id": 1}], + pipeline=[{"$count": Timestamp(1, 1)}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject Timestamp value", + ), + StageTestCase( + "type_binary", + docs=[{"_id": 1}], + pipeline=[{"$count": Binary(b"hello")}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject Binary value", + ), + StageTestCase( + "type_regex", + docs=[{"_id": 1}], + pipeline=[{"$count": Regex("abc")}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject Regex value", + ), + StageTestCase( + "type_code", + docs=[{"_id": 1}], + pipeline=[{"$count": Code("function(){}")}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject Code value", + ), + StageTestCase( + "type_code_with_scope", + docs=[{"_id": 1}], + pipeline=[{"$count": Code("function(){}", {"x": 1})}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject CodeWithScope value", + ), + StageTestCase( + "type_minkey", + docs=[{"_id": 1}], + pipeline=[{"$count": MinKey()}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject MinKey value", + ), + StageTestCase( + "type_maxkey", + docs=[{"_id": 1}], + pipeline=[{"$count": MaxKey()}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject MaxKey value", + ), + StageTestCase( + "type_expression_object", + docs=[{"_id": 1}], + pipeline=[{"$count": {"$literal": "x"}}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count should reject expression-like object without evaluating it", + ), +] + +# Property [String Validation Errors]: invalid string values are rejected +# with specific error codes depending on the violation. +COUNT_STRING_VALIDATION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "string_empty", + docs=[{"_id": 1}], + pipeline=[{"$count": ""}], + error_code=COUNT_FIELD_EMPTY_ERROR, + msg="$count should reject empty string", + ), + StageTestCase( + "string_dollar_prefix_bare", + docs=[{"_id": 1}], + pipeline=[{"$count": "$"}], + error_code=COUNT_FIELD_DOLLAR_PREFIX_ERROR, + msg="$count should reject string starting with $", + ), + StageTestCase( + "string_dollar_prefix_word", + docs=[{"_id": 1}], + pipeline=[{"$count": "$foo"}], + error_code=COUNT_FIELD_DOLLAR_PREFIX_ERROR, + msg="$count should reject $-prefixed path", + ), + StageTestCase( + "string_dollar_prefix_nested", + docs=[{"_id": 1}], + pipeline=[{"$count": "$$var"}], + error_code=COUNT_FIELD_DOLLAR_PREFIX_ERROR, + msg="$count should reject string starting with $$", + ), + StageTestCase( + "string_null_byte_middle", + docs=[{"_id": 1}], + pipeline=[{"$count": "a\x00b"}], + error_code=COUNT_FIELD_NULL_BYTE_ERROR, + msg="$count should reject string containing null byte in middle", + ), + StageTestCase( + "string_null_byte_start", + docs=[{"_id": 1}], + pipeline=[{"$count": "\x00abc"}], + error_code=COUNT_FIELD_NULL_BYTE_ERROR, + msg="$count should reject string starting with null byte", + ), + StageTestCase( + "string_null_byte_end", + docs=[{"_id": 1}], + pipeline=[{"$count": "abc\x00"}], + error_code=COUNT_FIELD_NULL_BYTE_ERROR, + msg="$count should reject string ending with null byte", + ), + StageTestCase( + "string_dot_bare", + docs=[{"_id": 1}], + pipeline=[{"$count": "."}], + error_code=COUNT_FIELD_DOT_ERROR, + msg="$count should reject a bare dot as field name", + ), + StageTestCase( + "string_dot_double", + docs=[{"_id": 1}], + pipeline=[{"$count": ".."}], + error_code=COUNT_FIELD_DOT_ERROR, + msg="$count should reject double dot as field name", + ), + StageTestCase( + "string_dot_middle", + docs=[{"_id": 1}], + pipeline=[{"$count": "a.b"}], + error_code=COUNT_FIELD_DOT_ERROR, + msg="$count should reject string containing dot", + ), + StageTestCase( + "string_dot_start", + docs=[{"_id": 1}], + pipeline=[{"$count": ".abc"}], + error_code=COUNT_FIELD_DOT_ERROR, + msg="$count should reject string starting with dot", + ), + StageTestCase( + "string_dot_end", + docs=[{"_id": 1}], + pipeline=[{"$count": "abc."}], + error_code=COUNT_FIELD_DOT_ERROR, + msg="$count should reject string ending with dot", + ), + StageTestCase( + "string_id_exact", + docs=[{"_id": 1}], + pipeline=[{"$count": "_id"}], + error_code=COUNT_FIELD_ID_RESERVED_ERROR, + msg="$count should reject the exact string '_id'", + ), +] + +# Property [Error Precedence]: when multiple validation rules are violated +# simultaneously, errors are produced in strict priority order: multi-key > +# non-string type > $ prefix > null byte > dot > empty string > _id reserved. +COUNT_ERROR_PRECEDENCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "precedence_extra_key_over_invalid_value", + docs=[{"_id": 1}], + pipeline=[{"$count": 123, "extra": 1}], + error_code=PIPELINE_STAGE_EXTRA_FIELD_ERROR, + msg="$count multi-key error should fire before value validation", + ), + StageTestCase( + "precedence_dollar_over_dot", + docs=[{"_id": 1}], + pipeline=[{"$count": "$."}], + error_code=COUNT_FIELD_DOLLAR_PREFIX_ERROR, + msg="$count $ prefix error should take precedence over dot error", + ), + StageTestCase( + "precedence_dollar_over_null_byte", + docs=[{"_id": 1}], + pipeline=[{"$count": "$\x00"}], + error_code=COUNT_FIELD_DOLLAR_PREFIX_ERROR, + msg="$count $ prefix error should take precedence over null byte error", + ), + StageTestCase( + "precedence_null_byte_over_dot_null_first", + docs=[{"_id": 1}], + pipeline=[{"$count": "\x00."}], + error_code=COUNT_FIELD_NULL_BYTE_ERROR, + msg="$count null byte error should take precedence over dot error when null byte is first", + ), + StageTestCase( + "precedence_null_byte_over_dot_dot_first", + docs=[{"_id": 1}], + pipeline=[{"$count": ".\x00"}], + error_code=COUNT_FIELD_NULL_BYTE_ERROR, + msg="$count null byte error should take precedence over dot error when dot is first", + ), +] + +# Property [Errors on Empty Collection]: every validation error fires even +# when the collection is empty, confirming that the server rejects invalid +# $count parameters without requiring any input documents. +COUNT_EMPTY_COLLECTION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "timing_type_error_empty_collection", + docs=None, + pipeline=[{"$count": 123}], + error_code=COUNT_FIELD_TYPE_ERROR, + msg="$count type error should fire on empty collection", + ), + StageTestCase( + "timing_empty_string_empty_collection", + docs=None, + pipeline=[{"$count": ""}], + error_code=COUNT_FIELD_EMPTY_ERROR, + msg="$count empty string error should fire on empty collection", + ), + StageTestCase( + "timing_dollar_prefix_empty_collection", + docs=None, + pipeline=[{"$count": "$bad"}], + error_code=COUNT_FIELD_DOLLAR_PREFIX_ERROR, + msg="$count $ prefix error should fire on empty collection", + ), + StageTestCase( + "timing_null_byte_empty_collection", + docs=None, + pipeline=[{"$count": "a\x00b"}], + error_code=COUNT_FIELD_NULL_BYTE_ERROR, + msg="$count null byte error should fire on empty collection", + ), + StageTestCase( + "timing_dot_empty_collection", + docs=None, + pipeline=[{"$count": "a.b"}], + error_code=COUNT_FIELD_DOT_ERROR, + msg="$count dot error should fire on empty collection", + ), + StageTestCase( + "timing_id_reserved_empty_collection", + docs=None, + pipeline=[{"$count": "_id"}], + error_code=COUNT_FIELD_ID_RESERVED_ERROR, + msg="$count _id reserved error should fire on empty collection", + ), +] + +COUNT_ERROR_TESTS = ( + COUNT_ARITY_ERROR_TESTS + + COUNT_TYPE_STRICTNESS_TESTS + + COUNT_STRING_VALIDATION_ERROR_TESTS + + COUNT_ERROR_PRECEDENCE_TESTS + + COUNT_EMPTY_COLLECTION_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(COUNT_ERROR_TESTS)) +def test_count_errors(collection, test_case: StageTestCase): + """Test $count error handling.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_field_names.py b/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_field_names.py new file mode 100644 index 00000000..88df299d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/count/test_count_field_names.py @@ -0,0 +1,232 @@ +"""Tests for $count aggregation stage — field name validation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Valid Field Names]: any non-empty string that does not start with +# $, does not contain a dot or null byte, and is not "_id" is accepted. +COUNT_VALID_FIELD_NAME_TESTS: list[StageTestCase] = [ + StageTestCase( + "valid_space", + docs=[{"_id": 1}], + pipeline=[{"$count": " "}], + expected=[{" ": 1}], + msg="$count should accept a space character as field name", + ), + StageTestCase( + "valid_tab", + docs=[{"_id": 1}], + pipeline=[{"$count": "\t"}], + expected=[{"\t": 1}], + msg="$count should accept a tab character as field name", + ), + StageTestCase( + "valid_nbsp", + docs=[{"_id": 1}], + # U+00A0 non-breaking space. + pipeline=[{"$count": "\u00a0"}], + expected=[{"\u00a0": 1}], + msg="$count should accept NBSP as field name", + ), + StageTestCase( + "valid_unicode_accented", + docs=[{"_id": 1}], + pipeline=[{"$count": "caf\u00e9"}], + expected=[{"caf\u00e9": 1}], + msg="$count should accept accented Unicode characters", + ), + StageTestCase( + "valid_unicode_emoji", + docs=[{"_id": 1}], + pipeline=[{"$count": "\U0001f600"}], + expected=[{"\U0001f600": 1}], + msg="$count should accept emoji as field name", + ), + StageTestCase( + "valid_unicode_cjk", + docs=[{"_id": 1}], + # U+4E16 (CJK character). + pipeline=[{"$count": "\u4e16"}], + expected=[{"\u4e16": 1}], + msg="$count should accept CJK characters as field name", + ), + StageTestCase( + "valid_zwsp", + docs=[{"_id": 1}], + # U+200B zero-width space. + pipeline=[{"$count": "\u200b"}], + expected=[{"\u200b": 1}], + msg="$count should accept zero-width space as field name", + ), + StageTestCase( + "valid_zwj_emoji_sequence", + docs=[{"_id": 1}], + # Family emoji ZWJ sequence. + pipeline=[{"$count": "\U0001f468\u200d\U0001f469\u200d\U0001f467"}], + expected=[{"\U0001f468\u200d\U0001f469\u200d\U0001f467": 1}], + msg="$count should accept ZWJ emoji sequence as field name", + ), + StageTestCase( + "valid_control_char_x01", + docs=[{"_id": 1}], + pipeline=[{"$count": "\x01"}], + expected=[{"\x01": 1}], + msg="$count should accept control character 0x01 as field name", + ), + StageTestCase( + "valid_control_char_x1f", + docs=[{"_id": 1}], + pipeline=[{"$count": "\x1f"}], + expected=[{"\x1f": 1}], + msg="$count should accept control character 0x1F as field name", + ), + StageTestCase( + "valid_del_x7f", + docs=[{"_id": 1}], + pipeline=[{"$count": "\x7f"}], + expected=[{"\x7f": 1}], + msg="$count should accept DEL character 0x7F as field name", + ), + StageTestCase( + "valid_backslash", + docs=[{"_id": 1}], + pipeline=[{"$count": "\\"}], + expected=[{"\\": 1}], + msg="$count should accept backslash as field name", + ), + StageTestCase( + "valid_braces", + docs=[{"_id": 1}], + pipeline=[{"$count": "{}"}], + expected=[{"{}": 1}], + msg="$count should accept braces as field name", + ), + StageTestCase( + "valid_brackets", + docs=[{"_id": 1}], + pipeline=[{"$count": "[]"}], + expected=[{"[]": 1}], + msg="$count should accept brackets as field name", + ), + StageTestCase( + "valid_double_quote", + docs=[{"_id": 1}], + pipeline=[{"$count": '"'}], + expected=[{'"': 1}], + msg="$count should accept double quote as field name", + ), + StageTestCase( + "valid_numeric_string", + docs=[{"_id": 1}], + pipeline=[{"$count": "123"}], + expected=[{"123": 1}], + msg="$count should accept numeric-looking string as field name", + ), + StageTestCase( + "valid_negative_numeric_string", + docs=[{"_id": 1}], + pipeline=[{"$count": "-1"}], + expected=[{"-1": 1}], + msg="$count should accept negative numeric-looking string as field name", + ), + StageTestCase( + "valid_dollar_in_middle", + docs=[{"_id": 1}], + pipeline=[{"$count": "a$b"}], + expected=[{"a$b": 1}], + msg="$count should accept $ in non-initial position", + ), + StageTestCase( + "valid_dollar_at_end", + docs=[{"_id": 1}], + pipeline=[{"$count": "a$"}], + expected=[{"a$": 1}], + msg="$count should accept $ at end of field name", + ), + StageTestCase( + "valid_proto", + docs=[{"_id": 1}], + pipeline=[{"$count": "__proto__"}], + expected=[{"__proto__": 1}], + msg="$count should accept __proto__ as field name", + ), + StageTestCase( + "valid_constructor", + docs=[{"_id": 1}], + pipeline=[{"$count": "constructor"}], + expected=[{"constructor": 1}], + msg="$count should accept 'constructor' as field name", + ), + StageTestCase( + "valid_long_name", + docs=[{"_id": 1}], + pipeline=[{"$count": "a" * 10_000}], + expected=[{"a" * 10_000: 1}], + msg="$count should accept very long field names", + ), +] + +# Property [Case-Sensitive _id Check]: only the exact string "_id" is +# rejected; case variations and similar strings are accepted. +COUNT_ID_CASE_SENSITIVITY_TESTS: list[StageTestCase] = [ + StageTestCase( + "string_id_case_variation_upper", + docs=[{"_id": 1}], + pipeline=[{"$count": "_ID"}], + expected=[{"_ID": 1}], + msg="$count should accept '_ID' since check is case-sensitive", + ), + StageTestCase( + "string_id_case_variation_mixed", + docs=[{"_id": 1}], + pipeline=[{"$count": "_Id"}], + expected=[{"_Id": 1}], + msg="$count should accept '_Id' since check is case-sensitive", + ), + StageTestCase( + "string_id_prefix", + docs=[{"_id": 1}], + pipeline=[{"$count": "__id"}], + expected=[{"__id": 1}], + msg="$count should accept '__id' since it is not exactly '_id'", + ), + StageTestCase( + "string_id_no_underscore", + docs=[{"_id": 1}], + pipeline=[{"$count": "id"}], + expected=[{"id": 1}], + msg="$count should accept 'id' since it is not exactly '_id'", + ), +] + +COUNT_FIELD_NAME_TESTS = COUNT_VALID_FIELD_NAME_TESTS + COUNT_ID_CASE_SENSITIVITY_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(COUNT_FIELD_NAME_TESTS)) +def test_count_field_names(collection, test_case: StageTestCase): + """Test $count field name validation.""" + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_count.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_count.py new file mode 100644 index 00000000..c8e3b592 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_count.py @@ -0,0 +1,196 @@ +"""Tests for $count composing with other stages at different pipeline positions.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Pipeline Position]: $count produces the correct count when +# composed with other stage types at different pipeline positions. +COUNT_PIPELINE_POSITION_TESTS: list[StageTestCase] = [ + StageTestCase( + "pipeline_after_match", + docs=[ + {"_id": 1, "status": "active"}, + {"_id": 2, "status": "inactive"}, + {"_id": 3, "status": "active"}, + {"_id": 4, "status": "active"}, + ], + pipeline=[{"$match": {"status": "active"}}, {"$count": "n"}], + expected=[{"n": 3}], + msg="$count should reflect documents remaining after $match", + ), + StageTestCase( + "pipeline_after_skip", + docs=[{"_id": 1}, {"_id": 2}, {"_id": 3}, {"_id": 4}, {"_id": 5}], + pipeline=[{"$skip": 2}, {"$count": "remaining"}], + expected=[{"remaining": 3}], + msg="$count should reflect documents remaining after $skip", + ), + StageTestCase( + "pipeline_after_limit", + docs=[{"_id": 1}, {"_id": 2}, {"_id": 3}, {"_id": 4}, {"_id": 5}], + pipeline=[{"$limit": 3}, {"$count": "limited"}], + expected=[{"limited": 3}], + msg="$count should reflect documents remaining after $limit", + ), + StageTestCase( + "pipeline_after_unwind", + docs=[ + {"_id": 1, "tags": ["a", "b", "c"]}, + {"_id": 2, "tags": ["d"]}, + ], + pipeline=[{"$unwind": "$tags"}, {"$count": "unwound"}], + expected=[{"unwound": 4}], + msg="$count should reflect documents produced by $unwind", + ), + StageTestCase( + "pipeline_after_group", + docs=[ + {"_id": 1, "cat": "a"}, + {"_id": 2, "cat": "b"}, + {"_id": 3, "cat": "a"}, + {"_id": 4, "cat": "c"}, + ], + pipeline=[{"$group": {"_id": "$cat"}}, {"$count": "groups"}], + expected=[{"groups": 3}], + msg="$count should return the number of groups produced by $group", + ), + StageTestCase( + "pipeline_after_project", + docs=[{"_id": 1, "x": 1}, {"_id": 2, "x": 2}, {"_id": 3, "x": 3}], + pipeline=[{"$project": {"x": 1}}, {"$count": "total"}], + expected=[{"total": 3}], + msg="$count should count documents reshaped by $project", + ), + StageTestCase( + "pipeline_after_unset", + docs=[{"_id": 1, "x": 1, "y": 2}, {"_id": 2, "x": 3, "y": 4}], + pipeline=[{"$unset": "y"}, {"$count": "total"}], + expected=[{"total": 2}], + msg="$count should count documents after $unset removes a field", + ), + StageTestCase( + "pipeline_after_addFields", + docs=[{"_id": 1, "x": 5}, {"_id": 2, "x": 10}], + pipeline=[ + {"$addFields": {"doubled": {"$multiply": ["$x", 2]}}}, + {"$count": "total"}, + ], + expected=[{"total": 2}], + msg="$count should count documents enriched by $addFields", + ), + StageTestCase( + "pipeline_after_replaceRoot", + docs=[ + {"_id": 1, "inner": {"a": 1}}, + {"_id": 2, "inner": {"a": 2}}, + {"_id": 3, "inner": {"a": 3}}, + ], + pipeline=[{"$replaceRoot": {"newRoot": "$inner"}}, {"$count": "total"}], + expected=[{"total": 3}], + msg="$count should count documents reshaped by $replaceRoot", + ), + StageTestCase( + "pipeline_after_sort_limit", + docs=[{"_id": i, "score": i} for i in range(10)], + pipeline=[ + {"$sort": {"score": -1}}, + {"$limit": 5}, + {"$count": "top_n"}, + ], + expected=[{"top_n": 5}], + msg="$count should return the number of top-N documents after $sort and $limit", + ), + StageTestCase( + "pipeline_after_lookup", + docs=[{"_id": 1, "code": "a"}, {"_id": 2, "code": "b"}], + setup=lambda c: c.database["items"].insert_one({"_id": 1, "code": "a", "v": 10}), + pipeline=[ + { + "$lookup": { + "from": "items", + "localField": "code", + "foreignField": "code", + "as": "matched", + } + }, + {"$count": "total"}, + ], + expected=[{"total": 2}], + msg="$count should count documents after $lookup join", + ), +] + +# Property [Unicode Normalization]: precomposed and decomposed forms of the +# same character produce distinct field names in the $count output document +# when composed with $addFields. +COUNT_UNICODE_NORMALIZATION_TESTS: list[StageTestCase] = [ + StageTestCase( + "normalization_precomposed_vs_decomposed", + docs=[{"_id": 1}], + # U+00E9 (precomposed) used as $count field, then U+0065 U+0301 + # (decomposed) added via $addFields. Both should coexist. + pipeline=[ + {"$count": "\u00e9"}, + {"$addFields": {"e\u0301": "decomposed"}}, + ], + expected=[{"\u00e9": 1, "e\u0301": "decomposed"}], + msg="$count should treat precomposed and decomposed forms as distinct field names", + ), + StageTestCase( + "normalization_decomposed_as_count_field", + docs=[{"_id": 1}], + # U+0065 U+0301 (decomposed) used as $count field, then U+00E9 + # (precomposed) added via $addFields. Both should coexist. + pipeline=[ + {"$count": "e\u0301"}, + {"$addFields": {"\u00e9": "precomposed"}}, + ], + expected=[{"e\u0301": 1, "\u00e9": "precomposed"}], + msg="$count should treat decomposed and precomposed forms as distinct field names", + ), + StageTestCase( + "normalization_hangul_precomposed_vs_decomposed", + docs=[{"_id": 1}], + # U+AC00 (precomposed Hangul syllable) vs U+1100 U+1161 (decomposed). + pipeline=[ + {"$count": "\uac00"}, + {"$addFields": {"\u1100\u1161": "decomposed"}}, + ], + expected=[{"\uac00": 1, "\u1100\u1161": "decomposed"}], + msg="$count should treat precomposed and decomposed Hangul as distinct field names", + ), +] + +COUNT_INTEGRATION_TESTS = COUNT_PIPELINE_POSITION_TESTS + COUNT_UNICODE_NORMALIZATION_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(COUNT_INTEGRATION_TESTS)) +def test_stages_position_count_cases(collection, test_case: StageTestCase): + """Test $count composing with other stages at different pipeline positions.""" + if test_case.setup: + test_case.setup(collection) + if test_case.docs: + collection.insert_many(test_case.docs) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 67a764f6..415654b1 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -158,11 +158,17 @@ INDEXOFCP_SUBSTRING_TYPE_ERROR = 40094 INDEXOF_INDEX_TYPE_ERROR = 40096 INDEXOF_NEGATIVE_INDEX_ERROR = 40097 +COUNT_FIELD_TYPE_ERROR = 40156 +COUNT_FIELD_EMPTY_ERROR = 40157 +COUNT_FIELD_DOLLAR_PREFIX_ERROR = 40158 +COUNT_FIELD_NULL_BYTE_ERROR = 40159 +COUNT_FIELD_DOT_ERROR = 40160 CONFLICTING_PATH_ERROR = 40176 MULTIPLE_EXPRESSIONS_ERROR = 40181 DOTTED_FIELD_IN_SUB_OBJECT_ERROR = 40183 QUERY_METADATA_NOT_AVAILABLE_ERROR = 40218 SET_SPECIFICATION_NOT_OBJECT_ERROR = 40272 +PIPELINE_STAGE_EXTRA_FIELD_ERROR = 40323 FIELD_PATH_EMPTY_ERROR = 40352 FIELD_PATH_TRAILING_DOT_ERROR = 40353 ARRAY_TO_OBJECT_NOT_ARRAY_ERROR = 40386 @@ -286,6 +292,7 @@ DATEADD_INVALID_LARGE_VALUE_ERROR = 5976500 DATEADD_INT64_MIN_NEGATE_ERROR = 6045000 SORT_DUPLICATE_KEY_ERROR = 7472500 +COUNT_FIELD_ID_RESERVED_ERROR = 9039800 CONVERT_BYTE_ORDER_TYPE_ERROR = 9130001 CONVERT_BYTE_ORDER_VALUE_ERROR = 9130002 NULL_CHAR_IN_FIELD_NAME_ERROR = 9534700