diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index ccbf21ac..37026f58 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -11,7 +11,7 @@ jobs: services: mongodb: - image: mongo:8.2 + image: mongo:8.2.4 ports: - 27017:27017 options: >- diff --git a/documentdb_tests/compatibility/tests/core/__init__.py b/documentdb_tests/compatibility/tests/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/__init__.py b/documentdb_tests/compatibility/tests/core/operator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_input_forms.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_input_forms.py new file mode 100644 index 00000000..4f9fe13f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_input_forms.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.concat_common import ( + ConcatTest, +) + +# Property [Arity]: $concat works with varying argument counts. +CONCAT_ARITY_TESTS: list[ConcatTest] = [ + ConcatTest( + "arity_single", + args=["solo"], + expected="solo", + msg="$concat should accept a single argument", + ), + ConcatTest( + "arity_two", + args=["hello", "world"], + expected="helloworld", + msg="$concat should accept two arguments", + ), +] + +# Empty array is inline-only since stored modes need at least one arg. +CONCAT_ARITY_EMPTY: list[ConcatTest] = [ + ConcatTest( + "arity_empty_array", + args=[], + expected="", + msg="$concat of empty array should return empty string", + ), +] + +# Property [Large Arity]: $concat accepts at least 1,000 arguments and produces the expected result. +# 1,000 is an arbitrary high count chosen to be well above typical usage. +CONCAT_LARGE_ARITY_TESTS: list[ConcatTest] = [ + ConcatTest( + "large_arity_1000", + args=[str(i % 10) for i in range(1_000)], + expected="".join(str(i % 10) for i in range(1_000)), + msg="$concat should accept 1000 arguments", + ), +] + +# Property [Expression Arguments]: $concat accepts any expression that resolves to a string. +CONCAT_EXPR_TESTS: list[ConcatTest] = [ + ConcatTest( + "expr_toupper", + args=[{"$toUpper": "hello"}], + expected="HELLO", + msg="$concat should accept $toUpper expression as argument", + ), + ConcatTest( + "expr_tolower", + args=[{"$toLower": "WORLD"}], + expected="world", + msg="$concat should accept $toLower expression as argument", + ), + ConcatTest( + "expr_two_expressions", + args=[{"$toUpper": "hello"}, {"$toLower": "WORLD"}], + expected="HELLOworld", + msg="$concat should accept multiple expression arguments", + ), + ConcatTest( + "expr_literal_and_expression", + args=["hello", {"$toUpper": " world"}], + expected="hello WORLD", + msg="$concat should accept mix of literal and expression arguments", + ), + ConcatTest( + "expr_mixed", + args=[{"$toUpper": "a"}, "b", {"$toLower": "C"}], + expected="Abc", + msg="$concat should interleave expression and literal arguments", + ), +] + +# Property [Edge Cases]: long strings, special characters, JSON/BSON-meaningful characters, repeated +# args. +CONCAT_EDGE_TESTS: list[ConcatTest] = [ + # Special characters: newlines, tabs, null bytes + ConcatTest( + "edge_newline_tab", + args=["line1\nline2", "\ttab"], + expected="line1\nline2\ttab", + msg="$concat should preserve newlines and tabs", + ), + ConcatTest( + "edge_null_byte", + args=["before\x00after", "ok"], + expected="before\x00afterok", + msg="$concat should preserve null bytes", + ), + # Characters meaningful in JSON/BSON + ConcatTest( + "edge_json_quote_backslash", + args=['say "hi"', " and \\ backslash"], + expected='say "hi" and \\ backslash', + msg="$concat should preserve JSON quotes and backslashes", + ), + ConcatTest( + "edge_json_braces_brackets", + args=["{key}", "[val]"], + expected="{key}[val]", + msg="$concat should preserve braces and brackets", + ), + # All arguments are the same string + ConcatTest( + "edge_all_same", + args=["abc", "abc", "abc"], + expected="abcabcabc", + msg="$concat should handle repeated identical arguments", + ), +] + +# Property [Nested Self-Application]: nested $concat within $concat produces the expected +# concatenated result. +CONCAT_NESTED_TESTS: list[ConcatTest] = [ + ConcatTest( + "nested_one_level", + args=[{"$concat": ["a", "b"]}, "c"], + expected="abc", + msg="$concat should accept nested $concat as argument", + ), +] + +CONCAT_INPUT_FORMS_TESTS = ( + CONCAT_ARITY_TESTS + + CONCAT_ARITY_EMPTY + + CONCAT_LARGE_ARITY_TESTS + + CONCAT_EXPR_TESTS + + CONCAT_EDGE_TESTS + + CONCAT_NESTED_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_INPUT_FORMS_TESTS)) +def test_concat_input_forms_cases(collection, test_case: ConcatTest): + """Test $concat input form cases.""" + result = execute_expression(collection, {"$concat": test_case.args}) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_invalid_args.py new file mode 100644 index 00000000..b2ca99b3 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_invalid_args.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from bson.code import Code + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import ( + CONCAT_TYPE_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF, MISSING + +from .utils.concat_common import ( + ConcatTest, +) + +# Property [Syntax Validation]: non-array argument of invalid type produces CONCAT_TYPE_ERROR. +CONCAT_SYNTAX_ERROR_TESTS: list[ConcatTest] = [ + ConcatTest( + "syntax_binary", + args=Binary(b"data"), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare binary as non-array argument", + ), + ConcatTest( + "syntax_bool", + args=True, # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare boolean as non-array argument", + ), + ConcatTest( + "syntax_date", + args=datetime(2024, 1, 1, tzinfo=timezone.utc), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare datetime as non-array argument", + ), + ConcatTest( + "syntax_decimal128", + args=DECIMAL128_ONE_AND_HALF, # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare Decimal128 as non-array argument", + ), + ConcatTest( + "syntax_float", + args=3.14, # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare float as non-array argument", + ), + ConcatTest( + "syntax_int", + args=42, # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare int as non-array argument", + ), + ConcatTest( + "syntax_long", + args=Int64(42), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare Int64 as non-array argument", + ), + ConcatTest( + "syntax_maxkey", + args=MaxKey(), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare MaxKey as non-array argument", + ), + ConcatTest( + "syntax_minkey", + args=MinKey(), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare MinKey as non-array argument", + ), + ConcatTest( + "syntax_object", + args={"a": 1}, # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare object as non-array argument", + ), + ConcatTest( + "syntax_objectid", + args=ObjectId("507f1f77bcf86cd799439011"), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare ObjectId as non-array argument", + ), + ConcatTest( + "syntax_regex", + args=Regex("pattern"), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare regex as non-array argument", + ), + ConcatTest( + "syntax_timestamp", + args=Timestamp(1, 1), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare Timestamp as non-array argument", + ), + ConcatTest( + "syntax_code", + args=Code("function() {}"), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare Code as non-array argument", + ), + ConcatTest( + "syntax_code_scope", + args=Code("function() {}", {"x": 1}), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare Code with scope as non-array argument", + ), + ConcatTest( + "syntax_binary_uuid", + args=Binary(b"data", 4), # type: ignore[arg-type] + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject bare binary UUID as non-array argument", + ), +] + +# Property [Expression Returning Wrong Type]: an expression that resolves to a non-string, non-null +# type at runtime is rejected with CONCAT_TYPE_ERROR. +CONCAT_EXPR_TYPE_ERROR_TESTS: list[ConcatTest] = [ + ConcatTest( + "expr_type_int_solo", + args=[{"$add": [1, 2]}], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject expression resolving to int", + ), + ConcatTest( + "expr_type_int_after_string", + args=["hello", {"$add": [1, 2]}], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject expression resolving to int after string", + ), + ConcatTest( + "expr_type_int_before_string", + args=[{"$add": [1, 2]}, "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject expression resolving to int before string", + ), +] + +# Property [Error Precedence - Type Error Wins]: when a type-invalid argument appears before null or +# missing in left-to-right order, the type error is reported. +CONCAT_ERROR_PREC_TYPE_WINS_TESTS: list[ConcatTest] = [ + ConcatTest( + "error_prec_int_before_null", + args=[42, None], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should report type error when int precedes null", + ), + ConcatTest( + "error_prec_int_null_str", + args=[42, None, "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should report type error when int precedes null and string", + ), + ConcatTest( + "error_prec_int_before_missing", + args=[42, MISSING], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should report type error when int precedes missing", + ), + ConcatTest( + "error_prec_leftmost_reported", + args=[42, True], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should report type error for leftmost invalid argument", + ), +] + +# Property [Dollar Sign Error]: a bare "$" is interpreted as a field path and "$$" is interpreted +# as an empty variable name. +CONCAT_DOLLAR_SIGN_ERROR_TESTS: list[ConcatTest] = [ + ConcatTest( + "dollar_bare", + args=["$"], + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$concat should reject bare '$' as invalid field path", + ), + ConcatTest( + "dollar_double", + args=["$$"], + error_code=FAILED_TO_PARSE_ERROR, + msg="$concat should reject '$$' as empty variable name", + ), +] + +CONCAT_INVALID_ARGS_TESTS = ( + CONCAT_SYNTAX_ERROR_TESTS + + CONCAT_EXPR_TYPE_ERROR_TESTS + + CONCAT_ERROR_PREC_TYPE_WINS_TESTS + + CONCAT_DOLLAR_SIGN_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_INVALID_ARGS_TESTS)) +def test_concat_invalid_args_cases(collection, test_case: ConcatTest): + """Test $concat invalid argument cases.""" + result = execute_expression(collection, {"$concat": test_case.args}) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_invariants.py new file mode 100644 index 00000000..36e4a34f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_invariants.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_case import BaseTestCase + +from .utils.concat_common import ( + ConcatTest, +) + +# Property [Length Additivity]: len(result) == sum(len(arg) for arg in args), for both codepoints +# and bytes. +CONCAT_LENGTH_TESTS: list[ConcatTest] = [ + ConcatTest( + "length_varying", + args=["a", "bb", "ccc"], + msg="$concat length should be additive for varying-length strings", + ), + ConcatTest( + "length_two_words", + args=["hello", "world"], + msg="$concat length should be additive for two words", + ), + ConcatTest( + "length_multibyte", + args=["café", "naïve"], + msg="$concat length should be additive for multibyte strings", + ), + ConcatTest( + "length_emoji", args=["🎉", "🚀"], msg="$concat length should be additive for emoji" + ), + ConcatTest("length_cjk", args=["日本", "語"], msg="$concat length should be additive for CJK"), + ConcatTest( + "length_mixed_byte_widths", + args=["a", "é", "🎉"], + msg="$concat length should be additive for mixed byte widths", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_LENGTH_TESTS)) +def test_concat_length_additivity(collection, test_case: ConcatTest): + """Test $concat length additivity.""" + expected_cp = sum(len(a) for a in test_case.args) + expected_bytes = sum(len(a.encode("utf-8")) for a in test_case.args) + concat = {"$concat": test_case.args} + result = execute_project( + collection, + { + "lenCodepoints": {"$strLenCP": concat}, + "sumCodepoints": {"$add": [{"$strLenCP": a} for a in test_case.args]}, + "lenBytes": {"$strLenBytes": concat}, + "sumBytes": {"$add": [{"$strLenBytes": a} for a in test_case.args]}, + }, + ) + assertSuccess( + result, + [ + { + "lenCodepoints": expected_cp, + "sumCodepoints": expected_cp, + "lenBytes": expected_bytes, + "sumBytes": expected_bytes, + } + ], + msg=test_case.msg, + ) + + +# Property [Associativity]: grouping does not affect the result. +# concat(concat(a, b), c) == concat(a, concat(b, c)) == concat(a, b, c) +@dataclass(frozen=True) +class ConcatAssocTest(BaseTestCase): + a: str = None # type: ignore[assignment] + b: str = None # type: ignore[assignment] + c: str = None # type: ignore[assignment] + + +CONCAT_ASSOC_TESTS: list[ConcatAssocTest] = [ + ConcatAssocTest( + "words", a="hello", b=" ", c="world", msg="$concat should be associative for words" + ), + ConcatAssocTest( + "single_chars", a="a", b="b", c="c", msg="$concat should be associative for single chars" + ), + ConcatAssocTest( + "empties_around", + a="", + b="x", + c="", + msg="$concat should be associative with empty strings around", + ), + ConcatAssocTest( + "empty_middle", + a="foo", + b="", + c="bar", + msg="$concat should be associative with empty middle", + ), + ConcatAssocTest( + "all_empty", a="", b="", c="", msg="$concat should be associative for all empty strings" + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_ASSOC_TESTS)) +def test_concat_associativity(collection, test_case: ConcatAssocTest): + """Test $concat associativity.""" + expected = test_case.a + test_case.b + test_case.c + a, b, c = test_case.a, test_case.b, test_case.c + result = execute_project( + collection, + { + "flat": {"$concat": [a, b, c]}, + "left": {"$concat": [{"$concat": [a, b]}, c]}, + "right": {"$concat": [a, {"$concat": [b, c]}]}, + }, + ) + assertSuccess( + result, [{"flat": expected, "left": expected, "right": expected}], msg=test_case.msg + ) + + +# Property [Return Type]: when no argument is null or missing, the result is always type "string". +CONCAT_RETURN_TYPE_TESTS: list[ConcatTest] = [ + ConcatTest( + "return_type_single", + args=["hello"], + msg="$concat of single string should return type string", + ), + ConcatTest( + "return_type_two", + args=["hello", "world"], + msg="$concat of two strings should return type string", + ), + ConcatTest( + "return_type_many", + args=["a", "b", "c"], + msg="$concat of many strings should return type string", + ), + ConcatTest( + "return_type_empty_string", + args=[""], + msg="$concat of empty string should return type string", + ), + ConcatTest( + "return_type_unicode", + args=["🎉", "日本語"], + msg="$concat of unicode strings should return type string", + ), + ConcatTest( + "return_type_expression", + args=[{"$toUpper": "hello"}], + msg="$concat of expression should return type string", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_RETURN_TYPE_TESTS)) +def test_concat_return_type(collection, test_case: ConcatTest): + """Test $concat result is always type string.""" + result = execute_expression(collection, {"$type": {"$concat": test_case.args}}) + assertSuccess(result, [{"result": "string"}], msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_null.py new file mode 100644 index 00000000..8bd6f8e0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_null.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import MISSING + +from .utils.concat_common import ( + ConcatTest, +) + +# Property [Null Propagation]: if any argument is null, the result is null. +# Argument shapes where _PLACEHOLDER is the null-producing value. +_PLACEHOLDER = object() +_NULL_PATTERNS = [ + ([_PLACEHOLDER], "single"), + (["hello", _PLACEHOLDER], "last"), + ([_PLACEHOLDER, "hello"], "first"), + (["a", _PLACEHOLDER, "b"], "middle"), + ([_PLACEHOLDER, _PLACEHOLDER], "all"), + ([_PLACEHOLDER, _PLACEHOLDER, _PLACEHOLDER], "many"), + (["a", "b", "c", _PLACEHOLDER, "d", "e"], "among_many_strings"), +] + + +def _build_null_tests(null_value, prefix) -> list[ConcatTest]: + """Generate test cases by replacing _PLACEHOLDER with the given null value.""" + return [ + ConcatTest( + f"{prefix}_{suffix}", + args=[null_value if a is _PLACEHOLDER else a for a in args], + expected=None, + msg=f"$concat should return null when {prefix} value is in position: {suffix}", + ) + for args, suffix in _NULL_PATTERNS + ] + + +# Explicit None. Used by inline, insert, and mixed tests. +CONCAT_NULL_TESTS = _build_null_tests(None, "null") + +# Missing field references (resolve to null at runtime). +# Only meaningful for inline since there is no stored field to read. +CONCAT_MISSING_TESTS = _build_null_tests(MISSING, "missing") + +CONCAT_MIXED_NULL_TESTS = [ + ConcatTest( + "null_and_missing", + args=[None, MISSING], + expected=None, + msg="$concat should return null when args contain both null and missing", + ), + ConcatTest( + "null_and_missing_among_strings", + args=["a", None, MISSING, "b"], + expected=None, + msg="$concat should return null when null and missing appear among strings", + ), +] + +# Property [Error Precedence - Null Wins]: when null or missing appears before a type-invalid +# argument in left-to-right order, null propagation takes precedence and the result is null. +CONCAT_ERROR_PREC_NULL_WINS_TESTS: list[ConcatTest] = [ + ConcatTest( + "error_prec_null_before_int", + args=[None, 42], + expected=None, + msg="$concat should return null when null precedes type-invalid argument", + ), + ConcatTest( + "error_prec_str_null_int", + args=["hello", None, 42], + expected=None, + msg="$concat should return null when null appears between string and invalid arg", + ), + ConcatTest( + "error_prec_null_before_two_invalid", + args=[None, 42, True], + expected=None, + msg="$concat should return null when null precedes multiple invalid args", + ), + ConcatTest( + "error_prec_missing_before_int", + args=[MISSING, 42], + expected=None, + msg="$concat should return null when missing precedes type-invalid argument", + ), +] + +CONCAT_NULL_ALL_TESTS = ( + CONCAT_NULL_TESTS + + CONCAT_MISSING_TESTS + + CONCAT_MIXED_NULL_TESTS + + CONCAT_ERROR_PREC_NULL_WINS_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_NULL_ALL_TESTS)) +def test_concat_null_cases(collection, test_case: ConcatTest): + """Test $concat null propagation cases.""" + result = execute_expression(collection, {"$concat": test_case.args}) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_size_limit.py new file mode 100644 index 00000000..7c061e61 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_size_limit.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, + execute_project_with_insert, +) +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES + +from .utils.concat_common import ( + ConcatTest, +) + +# Property [String Size Limit - Success]: a result just under the size limit succeeds, and null +# propagation takes precedence over the size limit when no single literal exceeds it. +CONCAT_SIZE_LIMIT_SUCCESS_TESTS: list[ConcatTest] = [ + # Two large strings concatenated, just under the limit. + ConcatTest( + "size_two_args_one_under", + args=[ + "a" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2), + "b" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2), + ], + expected="a" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + + "b" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2), + msg="$concat should handle two large strings concatenated together", + ), + ConcatTest( + "size_one_under", + args=["a" * (STRING_SIZE_LIMIT_BYTES - 1)], + expected="a" * (STRING_SIZE_LIMIT_BYTES - 1), + msg="$concat should succeed when result is one byte under the size limit", + ), + # 2-byte chars: one byte under the limit. + ConcatTest( + "size_one_under_2byte", + args=["é" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a"], + expected="é" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a", + msg="$concat should succeed with 2-byte chars one byte under the limit", + ), + # 4-byte chars: one byte under the limit. + ConcatTest( + "size_one_under_4byte", + args=["😀" * ((STRING_SIZE_LIMIT_BYTES - 1) // 4) + "abc"], + expected="😀" * ((STRING_SIZE_LIMIT_BYTES - 1) // 4) + "abc", + msg="$concat should succeed with 4-byte chars one byte under the limit", + ), + # Null propagation wins when individual args are under the limit. + ConcatTest( + "size_null_precedence", + args=["a" * (STRING_SIZE_LIMIT_BYTES // 2), None, "b" * (STRING_SIZE_LIMIT_BYTES // 2)], + expected=None, + msg="$concat should return null when null appears among large strings under the limit", + ), +] + +# Property [String Size Limit - Error]: a result at or above the size limit produces an error. +STRING_SIZE_LIMIT_ERROR_TESTS: list[ConcatTest] = [ + ConcatTest( + "size_at_limit", + args=["a" * STRING_SIZE_LIMIT_BYTES], + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$concat should reject result at the size limit", + ), + # Two halves summing to exactly the limit. + ConcatTest( + "size_two_halves", + args=["a" * (STRING_SIZE_LIMIT_BYTES // 2), "b" * (STRING_SIZE_LIMIT_BYTES // 2)], + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$concat should reject two strings summing to the size limit", + ), + # 2-byte chars totaling exactly the limit. + ConcatTest( + "size_at_limit_2byte", + args=["é" * (STRING_SIZE_LIMIT_BYTES // 2)], + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$concat should reject 2-byte chars totaling the size limit", + ), + # 4-byte chars totaling exactly the limit. + ConcatTest( + "size_at_limit_4byte", + args=["😀" * (STRING_SIZE_LIMIT_BYTES // 4)], + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$concat should reject 4-byte chars totaling the size limit", + ), + # Many small operands summing to exactly the limit. + ConcatTest( + "size_many_small", + args=["a" * (STRING_SIZE_LIMIT_BYTES // 1024)] * 1024, + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$concat should reject many small operands summing to the size limit", + ), + # Operand produced by a nested expression rather than a literal. + ConcatTest( + "size_nested", + args=[ + {"$concat": ["a" * (STRING_SIZE_LIMIT_BYTES // 2)]}, + "b" * (STRING_SIZE_LIMIT_BYTES // 2), + ], + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$concat should reject nested expression result exceeding the size limit", + ), +] + +CONCAT_SIZE_LIMIT_TESTS = CONCAT_SIZE_LIMIT_SUCCESS_TESTS + STRING_SIZE_LIMIT_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_SIZE_LIMIT_TESTS)) +def test_concat_size_limit_cases(collection, test_case: ConcatTest): + """Test $concat size limit cases.""" + result = execute_expression(collection, {"$concat": test_case.args}) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +def test_concat_size_limit_stored_field(collection): + """Test $concat size limit is enforced when stored fields contribute to the result.""" + result = execute_project_with_insert( + collection, + {"s": "a" * (STRING_SIZE_LIMIT_BYTES // 2)}, + {"result": {"$concat": ["$s", "b" * (STRING_SIZE_LIMIT_BYTES // 2)]}}, + ) + assertFailureCode( + result, STRING_SIZE_LIMIT_ERROR, msg="$concat should enforce size limit with stored fields" + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_string_values.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_string_values.py new file mode 100644 index 00000000..f3726c31 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_string_values.py @@ -0,0 +1,305 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.concat_common import ( + ConcatTest, +) + +# Property [Identity]: "" is the identity element. concat(s, "") == concat("", s) == s. +CONCAT_IDENTITY_TESTS: list[ConcatTest] = [ + ConcatTest( + "identity_empty_right", + args=["hello", ""], + expected="hello", + msg="$concat should treat empty string as identity on the right", + ), + ConcatTest( + "identity_empty_left", + args=["", "hello"], + expected="hello", + msg="$concat should treat empty string as identity on the left", + ), + ConcatTest( + "identity_empty_both_sides", + args=["", "hello", ""], + expected="hello", + msg="$concat should treat empty strings as identity on both sides", + ), + ConcatTest( + "identity_single_empty", + args=[""], + expected="", + msg="$concat of a single empty string should return empty string", + ), + ConcatTest( + "identity_two_empty", + args=["", ""], + expected="", + msg="$concat of two empty strings should return empty string", + ), + ConcatTest( + "identity_many_empty", + args=["", "", ""], + expected="", + msg="$concat of many empty strings should return empty string", + ), + ConcatTest( + "identity_empty_between_strings", + args=["a", "", "b"], + expected="ab", + msg="$concat should ignore empty string between non-empty strings", + ), + ConcatTest( + "identity_empty_interspersed", + args=["", "a", "", "b", ""], + expected="ab", + msg="$concat should ignore interspersed empty strings", + ), +] + +# Property [Ordering]: argument order is preserved; $concat is not commutative. +CONCAT_ORDERING_TESTS: list[ConcatTest] = [ + ConcatTest( + "ordering_three_words", + args=["hello", " ", "world"], + expected="hello world", + msg="$concat should preserve argument order for three words", + ), + ConcatTest( + "ordering_reversed", + args=["world", " ", "hello"], + expected="world hello", + msg="$concat should preserve reversed argument order", + ), + ConcatTest( + "ordering_ab", args=["a", "b"], expected="ab", msg="$concat should produce 'ab' not 'ba'" + ), + ConcatTest( + "ordering_ba", args=["b", "a"], expected="ba", msg="$concat should produce 'ba' not 'ab'" + ), + ConcatTest( + "ordering_digits", + args=["1", "2", "3"], + expected="123", + msg="$concat should preserve digit order", + ), + ConcatTest( + "ordering_digits_reversed", + args=["3", "2", "1"], + expected="321", + msg="$concat should preserve reversed digit order", + ), +] + +# Property [Unicode Integrity]: concatenation preserves byte and codepoint integrity for multi-byte +# UTF-8 strings. +CONCAT_UNICODE_TESTS: list[ConcatTest] = [ + ConcatTest( + "unicode_latin_accents", + args=["café", " ", "naïve"], + expected="café naïve", + msg="$concat should preserve Latin accented characters", + ), + ConcatTest( + "unicode_cjk", + args=["日本", "語"], + expected="日本語", + msg="$concat should preserve CJK characters", + ), + ConcatTest( + "unicode_emoji", + args=["🎉", "🚀", "✨"], + expected="🎉🚀✨", + msg="$concat should preserve emoji characters", + ), + # ZWJ (Zero Width Joiner, U+200D) joins characters into single glyphs. + # 👨‍👩‍👧‍👦 = man + ZWJ + woman + ZWJ + girl + ZWJ + boy (family emoji) + # 👨‍💻 = man + ZWJ + laptop (man technologist emoji) + ConcatTest( + "unicode_zwj_emoji", + args=["👨\u200d👩\u200d👧\u200d👦", "👨\u200d💻"], + expected="👨\u200d👩\u200d👧\u200d👦👨\u200d💻", + msg="$concat should preserve ZWJ emoji sequences", + ), + ConcatTest( + "unicode_zwj_at_boundary", + args=["👨\u200d", "💻"], + expected="👨\u200d💻", + msg="$concat should join ZWJ at string boundary", + ), + ConcatTest( + "unicode_combining_chars", + args=["e\u0301", "n\u0303o"], + expected="e\u0301n\u0303o", + msg="$concat should preserve combining characters without normalization", + ), + ConcatTest( + "unicode_mixed_scripts", + args=["hello", "世界", "🌍"], + expected="hello世界🌍", + msg="$concat should handle mixed scripts", + ), + ConcatTest( + "unicode_greek", + args=["α", "β", "γ"], + expected="αβγ", + msg="$concat should preserve Greek characters", + ), + ConcatTest( + "unicode_arabic", + args=["مرحبا", " ", "عالم"], + expected="مرحبا عالم", + msg="$concat should preserve Arabic characters", + ), + # Combining mark at start of second arg should not be normalized to precomposed form (é). + ConcatTest( + "unicode_combining_at_boundary", + args=["e", "\u0301"], + expected="e\u0301", + msg="$concat should not normalize combining mark at string boundary", + ), + # Precomposed (é) and decomposed (e + combining mark) forms must both survive without + # normalization. + ConcatTest( + "unicode_precomposed_and_decomposed", + args=["\u00e9", "e\u0301"], + expected="\u00e9e\u0301", + msg="$concat should preserve both precomposed and decomposed forms", + ), + # BOM is sometimes stripped as it is a leading encoding signal. After concat it lands mid-string + # and must survive. + ConcatTest( + "unicode_bom_in_middle", + args=["hello", "\ufeffworld"], + expected="hello\ufeffworld", + msg="$concat should preserve BOM character in middle of result", + ), + # Directional markers between concatenated strings must be preserved. + ConcatTest( + "unicode_directional_markers", + args=["hello\u200f", "\u200eworld"], + expected="hello\u200f\u200eworld", + msg="$concat should preserve directional markers at string boundaries", + ), +] + +# Property [Character Preservation]: zero-width spaces, control characters, dollar-prefixed +# literals, locale-sensitive letters, and non-Latin scripts are preserved without transformation. +CONCAT_CHAR_PRESERVATION_TESTS: list[ConcatTest] = [ + # U+200B zero-width space. + ConcatTest( + "char_pres_zero_width_space", + args=["a\u200bb", "c"], + expected="a\u200bbc", + msg="$concat should preserve zero-width space", + ), + # Control characters U+0001 and U+001F. + ConcatTest( + "char_pres_control_soh", + args=["\x01", "a"], + expected="\x01a", + msg="$concat should preserve SOH control character", + ), + ConcatTest( + "char_pres_control_us", + args=["a", "\x1f"], + expected="a\x1f", + msg="$concat should preserve US control character", + ), + # Dollar-prefixed string via $literal is preserved as text, not a field reference. + ConcatTest( + "char_pres_dollar_literal", + args=[{"$literal": "$hello"}, " world"], + expected="$hello world", + msg="$concat should preserve dollar-prefixed string via $literal", + ), + # German sharp s, fi ligature (U+FB01), Turkish dotless i (U+0131). + ConcatTest( + "char_pres_locale_sensitive", + args=["ß", "fi", "ı"], + expected="ßfiı", + msg="$concat should preserve locale-sensitive characters", + ), + # Cyrillic and Deseret script characters. + ConcatTest( + "char_pres_cyrillic", + args=["д", "Д"], + expected="дД", + msg="$concat should preserve Cyrillic characters", + ), + # U+10400 Deseret capital long I, U+10428 Deseret small long I. + ConcatTest( + "char_pres_deseret", + args=["𐐀", "𐐨"], + expected="𐐀𐐨", + msg="$concat should preserve Deseret script characters", + ), +] + +# Property [Whitespace Preservation]: whitespace characters including CR, CRLF, non-breaking space, +# and Unicode whitespace are preserved in concatenation. +CONCAT_WHITESPACE_TESTS: list[ConcatTest] = [ + ConcatTest( + "whitespace_cr", + args=["a\rb", "c"], + expected="a\rbc", + msg="$concat should preserve carriage return", + ), + ConcatTest( + "whitespace_crlf", + args=["a\r\nb", "c"], + expected="a\r\nbc", + msg="$concat should preserve CRLF", + ), + # U+00A0 non-breaking space. + ConcatTest( + "whitespace_nbsp", + args=["a\u00a0b", "c"], + expected="a\u00a0bc", + msg="$concat should preserve non-breaking space", + ), + # U+2000 en space. + ConcatTest( + "whitespace_en_space", + args=["a\u2000b", "c"], + expected="a\u2000bc", + msg="$concat should preserve en space", + ), + # U+2003 em space. + ConcatTest( + "whitespace_em_space", + args=["a\u2003b", "c"], + expected="a\u2003bc", + msg="$concat should preserve em space", + ), + ConcatTest( + "whitespace_mixed", + args=[" \t\n\r ", "x"], + expected=" \t\n\r x", + msg="$concat should preserve mixed whitespace characters", + ), +] + +CONCAT_STRING_VALUES_TESTS = ( + CONCAT_IDENTITY_TESTS + + CONCAT_ORDERING_TESTS + + CONCAT_UNICODE_TESTS + + CONCAT_CHAR_PRESERVATION_TESTS + + CONCAT_WHITESPACE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_STRING_VALUES_TESTS)) +def test_concat_string_values_cases(collection, test_case: ConcatTest): + """Test $concat string value cases.""" + result = execute_expression(collection, {"$concat": test_case.args}) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_type_errors.py new file mode 100644 index 00000000..42f69bb4 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_type_errors.py @@ -0,0 +1,341 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from bson.code import Code + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import CONCAT_TYPE_ERROR +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF + +from .utils.concat_common import ( + ConcatTest, +) + +# Property [Type Strictness]: any non-string, non-null argument produces CONCAT_TYPE_ERROR. +CONCAT_TYPE_ERROR_TESTS: list[ConcatTest] = [ + # Invalid value as the only argument. + ConcatTest( + "type_array_solo", + args=[["a"]], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject array argument", + ), + ConcatTest( + "type_binary_solo", + args=[Binary(b"data")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject binary argument", + ), + ConcatTest( + "type_bool_solo", + args=[True], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject boolean argument", + ), + ConcatTest( + "type_date_solo", + args=[datetime(2024, 1, 1, tzinfo=timezone.utc)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject datetime argument", + ), + ConcatTest( + "type_decimal128_solo", + args=[DECIMAL128_ONE_AND_HALF], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Decimal128 argument", + ), + ConcatTest( + "type_float_solo", + args=[3.14], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject float argument", + ), + ConcatTest( + "type_int_solo", + args=[42], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject int argument", + ), + ConcatTest( + "type_long_solo", + args=[Int64(42)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Int64 argument", + ), + ConcatTest( + "type_maxkey_solo", + args=[MaxKey()], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject MaxKey argument", + ), + ConcatTest( + "type_minkey_solo", + args=[MinKey()], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject MinKey argument", + ), + ConcatTest( + "type_object_solo", + args=[{"a": 1}], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject object argument", + ), + ConcatTest( + "type_objectid_solo", + args=[ObjectId("507f1f77bcf86cd799439011")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject ObjectId argument", + ), + ConcatTest( + "type_regex_solo", + args=[Regex("pattern")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject regex argument", + ), + ConcatTest( + "type_timestamp_solo", + args=[Timestamp(1, 1)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Timestamp argument", + ), + ConcatTest( + "type_code_solo", + args=[Code("function() {}")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Code argument", + ), + ConcatTest( + "type_code_scope_solo", + args=[Code("function() {}", {"x": 1})], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Code with scope argument", + ), + ConcatTest( + "type_binary_uuid_solo", + args=[Binary(b"data", 4)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject binary UUID argument", + ), + # Invalid value after a valid string. + ConcatTest( + "type_array_after_string", + args=["hello", ["a"]], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject array after valid string", + ), + ConcatTest( + "type_binary_after_string", + args=["hello", Binary(b"data")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject binary after valid string", + ), + ConcatTest( + "type_bool_after_string", + args=["hello", True], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject boolean after valid string", + ), + ConcatTest( + "type_date_after_string", + args=["hello", datetime(2024, 1, 1, tzinfo=timezone.utc)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject datetime after valid string", + ), + ConcatTest( + "type_decimal128_after_string", + args=["hello", DECIMAL128_ONE_AND_HALF], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Decimal128 after valid string", + ), + ConcatTest( + "type_float_after_string", + args=["hello", 3.14], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject float after valid string", + ), + ConcatTest( + "type_int_after_string", + args=["hello", 42], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject int after valid string", + ), + ConcatTest( + "type_long_after_string", + args=["hello", Int64(42)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Int64 after valid string", + ), + ConcatTest( + "type_maxkey_after_string", + args=["hello", MaxKey()], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject MaxKey after valid string", + ), + ConcatTest( + "type_minkey_after_string", + args=["hello", MinKey()], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject MinKey after valid string", + ), + ConcatTest( + "type_object_after_string", + args=["hello", {"a": 1}], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject object after valid string", + ), + ConcatTest( + "type_objectid_after_string", + args=["hello", ObjectId("507f1f77bcf86cd799439011")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject ObjectId after valid string", + ), + ConcatTest( + "type_regex_after_string", + args=["hello", Regex("pattern")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject regex after valid string", + ), + ConcatTest( + "type_timestamp_after_string", + args=["hello", Timestamp(1, 1)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Timestamp after valid string", + ), + ConcatTest( + "type_code_after_string", + args=["hello", Code("function() {}")], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Code after valid string", + ), + ConcatTest( + "type_code_scope_after_string", + args=["hello", Code("function() {}", {"x": 1})], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Code with scope after valid string", + ), + ConcatTest( + "type_binary_uuid_after_string", + args=["hello", Binary(b"data", 4)], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject binary UUID after valid string", + ), + # Invalid value before a valid string. + ConcatTest( + "type_array_before_string", + args=[["a"], "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject array before valid string", + ), + ConcatTest( + "type_binary_before_string", + args=[Binary(b"data"), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject binary before valid string", + ), + ConcatTest( + "type_bool_before_string", + args=[True, "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject boolean before valid string", + ), + ConcatTest( + "type_date_before_string", + args=[datetime(2024, 1, 1, tzinfo=timezone.utc), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject datetime before valid string", + ), + ConcatTest( + "type_decimal128_before_string", + args=[DECIMAL128_ONE_AND_HALF, "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Decimal128 before valid string", + ), + ConcatTest( + "type_float_before_string", + args=[3.14, "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject float before valid string", + ), + ConcatTest( + "type_int_before_string", + args=[42, "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject int before valid string", + ), + ConcatTest( + "type_long_before_string", + args=[Int64(42), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Int64 before valid string", + ), + ConcatTest( + "type_maxkey_before_string", + args=[MaxKey(), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject MaxKey before valid string", + ), + ConcatTest( + "type_minkey_before_string", + args=[MinKey(), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject MinKey before valid string", + ), + ConcatTest( + "type_object_before_string", + args=[{"a": 1}, "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject object before valid string", + ), + ConcatTest( + "type_objectid_before_string", + args=[ObjectId("507f1f77bcf86cd799439011"), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject ObjectId before valid string", + ), + ConcatTest( + "type_regex_before_string", + args=[Regex("pattern"), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject regex before valid string", + ), + ConcatTest( + "type_timestamp_before_string", + args=[Timestamp(1, 1), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Timestamp before valid string", + ), + ConcatTest( + "type_code_before_string", + args=[Code("function() {}"), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Code before valid string", + ), + ConcatTest( + "type_code_scope_before_string", + args=[Code("function() {}", {"x": 1}), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject Code with scope before valid string", + ), + ConcatTest( + "type_binary_uuid_before_string", + args=[Binary(b"data", 4), "hello"], + error_code=CONCAT_TYPE_ERROR, + msg="$concat should reject binary UUID before valid string", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(CONCAT_TYPE_ERROR_TESTS)) +def test_concat_type_error_cases(collection, test_case: ConcatTest): + """Test $concat type error cases.""" + result = execute_expression(collection, {"$concat": test_case.args}) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_usage.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_usage.py new file mode 100644 index 00000000..630e579b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/test_concat_usage.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project_with_insert, +) +from documentdb_tests.framework.assertions import assertSuccess + + +# Property [Document Field References]: $concat works with field references from inserted documents. +def test_concat_document_fields(collection): + """Test $concat reads values from document fields.""" + result = execute_project_with_insert( + collection, + {"a": "hello", "b": " ", "c": "world"}, + {"result": {"$concat": ["$a", "$b", "$c"]}}, + ) + assertSuccess( + result, [{"result": "hello world"}], msg="$concat should resolve field references" + ) + + +# Property [Bare Argument]: bare string or null (non-array) is accepted by $concat. +_SYNTAX_BARE_VALID = [ + pytest.param("hello", "hello", id="bare_string"), + pytest.param(None, None, id="bare_null"), +] + + +@pytest.mark.parametrize("value, expected", _SYNTAX_BARE_VALID) +def test_concat_bare_argument(collection, value, expected): + """Test $concat accepts a bare string or null without an array wrapper.""" + result = execute_expression(collection, {"$concat": value}) + assertSuccess( + result, + [{"result": expected}], + msg=f"$concat bare argument {value!r} should return {expected!r}", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/utils/concat_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/utils/concat_common.py new file mode 100644 index 00000000..8158eb48 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/concat/utils/concat_common.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class ConcatTest(BaseTestCase): + args: list[Any] = None # type: ignore[assignment] diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_core.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_core.py new file mode 100644 index 00000000..a1b3e37d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_core.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, + execute_project_with_insert, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.split_common import ( + SplitTest, + _expr, +) + +# Property [Core Splitting]: splitting produces an array of substrings between +# delimiter occurrences, with the delimiter excluded from results. +SPLIT_CORE_TESTS: list[SplitTest] = [ + SplitTest( + "core_basic", + string="a-b-c", + delimiter="-", + expected=["a", "b", "c"], + msg="$split should split on single-char delimiter", + ), + SplitTest( + "core_space_delim", + string="hello world foo", + delimiter=" ", + expected=["hello", "world", "foo"], + msg="$split should split on space delimiter", + ), + # Delimiter not found returns single-element array with original string. + SplitTest( + "core_no_match", + string="hello", + delimiter="xyz", + expected=["hello"], + msg="$split should return single-element array when delimiter not found", + ), + # Delimiter at start produces leading empty string. + SplitTest( + "core_delim_at_start", + string="-hello-world", + delimiter="-", + expected=["", "hello", "world"], + msg="$split should produce leading empty string when delimiter is at start", + ), + # Delimiter at end produces trailing empty string. + SplitTest( + "core_delim_at_end", + string="hello-world-", + delimiter="-", + expected=["hello", "world", ""], + msg="$split should produce trailing empty string when delimiter is at end", + ), + # Consecutive delimiters produce empty strings between them. + SplitTest( + "core_consecutive_delim", + string="a--b", + delimiter="-", + expected=["a", "", "b"], + msg="$split should produce empty string between consecutive delimiters", + ), + SplitTest( + "core_triple_consecutive_delim", + string="a---b", + delimiter="-", + expected=["a", "", "", "b"], + msg="$split should produce multiple empty strings for triple consecutive delimiters", + ), + # Multi-character delimiter. + SplitTest( + "core_multi_char_delim", + string="helloXYworld", + delimiter="XY", + expected=["hello", "world"], + msg="$split should match multi-character delimiter as a unit", + ), + # Delimiter equals the input. + SplitTest( + "core_delim_equals_string", + string="hello", + delimiter="hello", + expected=["", ""], + msg="$split should produce two empty strings when delimiter equals input", + ), + # Greedy left-to-right: "aaa" split by "aa" matches at position 0, leaving "a". + SplitTest( + "core_greedy_overlap", + string="aaa", + delimiter="aa", + expected=["", "a"], + msg="$split should match greedily left-to-right for overlapping delimiter", + ), +] + +# Property [Expression Arguments]: both argument positions accept expressions +# that resolve to strings, not just literals. +SPLIT_EXPR_TESTS: list[SplitTest] = [ + SplitTest( + "expr_string_from_concat", + string={"$concat": ["a-", "b"]}, + delimiter="-", + expected=["a", "b"], + msg="$split should accept expression resolving to string as first argument", + ), + SplitTest( + "expr_delimiter_from_concat", + string="hello-world", + delimiter={"$concat": ["-", ""]}, + expected=["hello", "world"], + msg="$split should accept expression resolving to string as delimiter", + ), + # $literal suppresses field-path interpretation of $-prefixed strings. + SplitTest( + "expr_literal_dollar_prefix", + string={"$literal": "$hello"}, + delimiter="h", + expected=["$", "ello"], + msg="$split should treat $literal dollar-prefixed string as text, not field path", + ), +] + +SPLIT_CORE_ALL_TESTS = SPLIT_CORE_TESTS + SPLIT_EXPR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_CORE_ALL_TESTS)) +def test_split_core_cases(collection, test_case: SplitTest): + """Test $split core splitting and expression argument cases.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Document Field References]: $split works with values read from +# document fields. +def test_split_document_fields(collection): + """Test $split reads values from document fields.""" + result = execute_project_with_insert( + collection, + {"s": "a,b,c", "d": ","}, + {"result": {"$split": ["$s", "$d"]}}, + ) + assertSuccess( + result, + [{"result": ["a", "b", "c"]}], + msg="$split should read string and delimiter from document fields", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_encoding.py new file mode 100644 index 00000000..f12777fa --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_encoding.py @@ -0,0 +1,276 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.split_common import ( + SplitTest, + _expr, +) + +# Property [Encoding and Character Handling]: the delimiter is matched as a +# literal string, with correct handling of multi-byte UTF-8, special +# characters, and no Unicode normalization. +SPLIT_ENCODING_TESTS: list[SplitTest] = [ + # Regex special characters treated as literals. + SplitTest( + "encoding_dot_delim", + string="a.b.c", + delimiter=".", + expected=["a", "b", "c"], + msg="$split should treat dot as literal, not regex wildcard", + ), + SplitTest( + "encoding_star_delim", + string="a*b*c", + delimiter="*", + expected=["a", "b", "c"], + msg="$split should treat star as literal, not regex quantifier", + ), + SplitTest( + "encoding_backslash_delim", + string="a\\b\\c", + delimiter="\\", + expected=["a", "b", "c"], + msg="$split should treat backslash as literal delimiter", + ), + # JSON/BSON-meaningful characters work as content and delimiters. + SplitTest( + "encoding_brace_delim", + string="a{b}c", + delimiter="{", + expected=["a", "b}c"], + msg="$split should treat brace as literal delimiter", + ), + SplitTest( + "encoding_pipe_delim", + string="a|b|c", + delimiter="|", + expected=["a", "b", "c"], + msg="$split should treat pipe as literal delimiter", + ), + # 2-byte UTF-8 (e.g., U+00E9 é). + SplitTest( + "encoding_2byte_in_string", + string="café-résumé", + delimiter="-", + expected=["café", "résumé"], + msg="$split should correctly split strings containing 2-byte UTF-8 characters", + ), + SplitTest( + "encoding_2byte_as_delim", + string="caférésumééblanc", + delimiter="\u00e9", + expected=["caf", "r", "sum", "", "blanc"], + msg="$split should use 2-byte UTF-8 character as delimiter", + ), + # 3-byte UTF-8 (e.g., U+263A ☺). + SplitTest( + "encoding_3byte_as_delim", + string="a\u263ab\u263ac", + delimiter="\u263a", + expected=["a", "b", "c"], + msg="$split should use 3-byte UTF-8 character as delimiter", + ), + # 4-byte UTF-8 (e.g., U+1F389 🎉). + SplitTest( + "encoding_4byte_as_delim", + string="a\U0001f389b\U0001f389c", + delimiter="\U0001f389", + expected=["a", "b", "c"], + msg="$split should use 4-byte UTF-8 character as delimiter", + ), + # Special characters as delimiters. + SplitTest( + "encoding_newline_delim", + string="hello\nworld\nfoo", + delimiter="\n", + expected=["hello", "world", "foo"], + msg="$split should split on newline delimiter", + ), + SplitTest( + "encoding_tab_delim", + string="hello\tworld", + delimiter="\t", + expected=["hello", "world"], + msg="$split should split on tab delimiter", + ), + SplitTest( + "encoding_null_byte_delim", + string="hello\x00world", + delimiter="\x00", + expected=["hello", "world"], + msg="$split should split on null byte delimiter", + ), + # Multi-char special delimiter. + SplitTest( + "encoding_crlf_delim", + string="hello\r\nworld\r\nfoo", + delimiter="\r\n", + expected=["hello", "world", "foo"], + msg="$split should split on CRLF as a two-character delimiter", + ), + # Precomposed U+00E9 and decomposed U+0065+U+0301 are distinct. + SplitTest( + "encoding_precomposed_not_matched_by_decomposed", + string="caf\u00e9", + delimiter="\u0065\u0301", + expected=["caf\u00e9"], + msg="$split should not match precomposed character with decomposed delimiter", + ), + SplitTest( + "encoding_decomposed_not_matched_by_precomposed", + string="caf\u0065\u0301", + delimiter="\u00e9", + expected=["caf\u0065\u0301"], + msg="$split should not match decomposed character with precomposed delimiter", + ), + # Base character matches independently of following combining mark. + SplitTest( + "encoding_base_char_splits_before_combining_mark", + string="caf\u0065\u0301", + delimiter="e", + expected=["caf", "\u0301"], + msg="$split should split on base character leaving combining mark in next segment", + ), + # ZWJ (U+200D) is treated as a regular code point. + SplitTest( + "encoding_zwj_splits_emoji_sequence", + string="\U0001f468\u200d\U0001f469\u200d\U0001f467", + delimiter="\u200d", + expected=["\U0001f468", "\U0001f469", "\U0001f467"], + msg="$split should split ZWJ emoji sequence on ZWJ character", + ), + # BOM (U+FEFF) as delimiter. + SplitTest( + "encoding_bom_delim", + string="a\ufeffb\ufeffc", + delimiter="\ufeff", + expected=["a", "b", "c"], + msg="$split should split on BOM character as delimiter", + ), + # Zero-width space (U+200B) as delimiter. + SplitTest( + "encoding_zero_width_space_delim", + string="a\u200bb\u200bc", + delimiter="\u200b", + expected=["a", "b", "c"], + msg="$split should split on zero-width space as delimiter", + ), + # Directional marks as delimiters: LRM (U+200E) and RLM (U+200F). + SplitTest( + "encoding_lrm_delim", + string="a\u200eb\u200ec", + delimiter="\u200e", + expected=["a", "b", "c"], + msg="$split should split on left-to-right mark as delimiter", + ), + SplitTest( + "encoding_rlm_delim", + string="a\u200fb\u200fc", + delimiter="\u200f", + expected=["a", "b", "c"], + msg="$split should split on right-to-left mark as delimiter", + ), + # Control character (U+0001) as delimiter. + SplitTest( + "encoding_control_char_delim", + string="a\x01b\x01c", + delimiter="\x01", + expected=["a", "b", "c"], + msg="$split should split on control character as delimiter", + ), + # Different whitespace characters do not match each other. + SplitTest( + "encoding_tab_not_space", + string="a\tb", + delimiter=" ", + expected=["a\tb"], + msg="$split should not match tab when delimiter is space", + ), + # NBSP (U+00A0) does not match ASCII space. + SplitTest( + "encoding_nbsp_not_space", + string="a\u00a0b", + delimiter=" ", + expected=["a\u00a0b"], + msg="$split should not match non-breaking space when delimiter is ASCII space", + ), + # Splitting CRLF by \n leaves \r attached to preceding segment. + SplitTest( + "encoding_crlf_split_by_lf", + string="hello\r\nworld", + delimiter="\n", + expected=["hello\r", "world"], + msg="$split on LF should leave CR attached to preceding segment", + ), + # Splitting CRLF by \r leaves \n attached to following segment. + SplitTest( + "encoding_crlf_split_by_cr", + string="hello\r\nworld", + delimiter="\r", + expected=["hello", "\nworld"], + msg="$split on CR should leave LF attached to following segment", + ), + # Escape sequence \d as delimiter does not match digits (literal matching). + SplitTest( + "encoding_backslash_d_literal", + string="a1b2c", + delimiter="\\d", + expected=["a1b2c"], + msg="$split should treat backslash-d as literal two-char string, not regex digit class", + ), +] + +# Property [Case Sensitivity]: splitting is always case-sensitive for all +# scripts. +SPLIT_CASE_SENSITIVITY_TESTS: list[SplitTest] = [ + # ASCII: uppercase "H" does not match lowercase "h". + SplitTest( + "case_ascii", + string="Hello-hello-HELLO", + delimiter="hello", + expected=["Hello-", "-HELLO"], + msg="$split should be case-sensitive for ASCII letters", + ), + # Latin extended: U+00E9 (é) does not match U+00C9 (É). + SplitTest( + "case_latin_extended", + string="café-cafÉ", + delimiter="\u00e9", + expected=["caf", "-cafÉ"], + msg="$split should be case-sensitive for Latin extended characters", + ), + # Greek: U+03C3 (σ) does not match U+03A3 (Σ). + SplitTest( + "case_greek", + string="\u03c3-\u03a3-\u03c3", + delimiter="\u03a3", + expected=["\u03c3-", "-\u03c3"], + msg="$split should be case-sensitive for Greek letters", + ), + # Cyrillic: U+0430 (а) does not match U+0410 (А). + SplitTest( + "case_cyrillic", + string="\u0430-\u0410-\u0430", + delimiter="\u0410", + expected=["\u0430-", "-\u0430"], + msg="$split should be case-sensitive for Cyrillic letters", + ), +] + +SPLIT_ENCODING_ALL_TESTS = SPLIT_ENCODING_TESTS + SPLIT_CASE_SENSITIVITY_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_ENCODING_ALL_TESTS)) +def test_split_encoding_cases(collection, test_case: SplitTest): + """Test $split encoding, character handling, and case sensitivity cases.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_invalid_args.py new file mode 100644 index 00000000..1b411d33 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_invalid_args.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from bson.code import Code + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import ( + EXPRESSION_TYPE_MISMATCH_ERROR, + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, + SPLIT_EMPTY_SEPARATOR_ERROR, + SPLIT_STRING_TYPE_ERROR, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.split_common import ( + SplitTest, + _expr, +) + +# Property [Empty Delimiter Error]: an empty string delimiter produces +# SPLIT_EMPTY_SEPARATOR_ERROR. +SPLIT_EMPTY_DELIM_ERROR_TESTS: list[SplitTest] = [ + SplitTest( + "empty_delim_literal", + string="abc", + delimiter="", + error_code=SPLIT_EMPTY_SEPARATOR_ERROR, + msg="$split should reject empty string as delimiter", + ), + SplitTest( + "empty_delim_both_empty", + string="", + delimiter="", + error_code=SPLIT_EMPTY_SEPARATOR_ERROR, + msg="$split should reject empty delimiter even when string is also empty", + ), + # Type error in first argument takes precedence over empty delimiter error. + SplitTest( + "empty_delim_type_error_precedence", + string=42, + delimiter="", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should report string type error before empty delimiter error", + ), +] + + +# Property [Syntax Validation]: a bare "$" is rejected as an invalid field path, +# "$$" is rejected as an empty variable name, and "$$NOW" resolves to a datetime which fails +# the type check. +SPLIT_SYNTAX_TESTS: list[SplitTest] = [ + SplitTest( + "syntax_bare_dollar", + string="$", + delimiter="-", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$split should reject bare '$' as invalid field path", + ), + SplitTest( + "syntax_double_dollar_string", + string="$$", + delimiter="-", + error_code=FAILED_TO_PARSE_ERROR, + msg="$split should reject '$$' as empty variable name in string position", + ), + SplitTest( + "syntax_double_dollar_delimiter", + string="hello", + delimiter="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$split should reject '$$' as empty variable name in delimiter position", + ), + SplitTest( + "syntax_now_resolves_to_date", + string="$$NOW", + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject $$NOW which resolves to datetime, not string", + ), +] + +# Property [Arity Error]: providing fewer or more than two arguments produces +# EXPRESSION_TYPE_MISMATCH_ERROR. Raw expressions are needed since the +# dataclass always builds two-arg expressions. +SPLIT_ARITY_ERROR_TESTS: list[SplitTest] = [ + SplitTest( + "arity_zero", + expr={"$split": []}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject zero arguments", + ), + SplitTest( + "arity_one", + expr={"$split": ["hello"]}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject one argument", + ), + SplitTest( + "arity_three", + expr={"$split": ["hello", "-", "extra"]}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject three arguments", + ), + # Non-array argument shapes are treated as 1 argument. + SplitTest( + "arity_string", + expr={"$split": "hello"}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare string as non-array argument", + ), + SplitTest( + "arity_int", + expr={"$split": 42}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare int as non-array argument", + ), + SplitTest( + "arity_float", + expr={"$split": 3.14}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare float as non-array argument", + ), + SplitTest( + "arity_long", + expr={"$split": Int64(1)}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare Int64 as non-array argument", + ), + SplitTest( + "arity_decimal", + expr={"$split": Decimal128("1")}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare Decimal128 as non-array argument", + ), + SplitTest( + "arity_bool", + expr={"$split": True}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare boolean as non-array argument", + ), + SplitTest( + "arity_null", + expr={"$split": None}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare null as non-array argument", + ), + SplitTest( + "arity_object", + expr={"$split": {"a": 1}}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare object as non-array argument", + ), + SplitTest( + "arity_binary", + expr={"$split": Binary(b"data")}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare binary as non-array argument", + ), + SplitTest( + "arity_date", + expr={"$split": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare datetime as non-array argument", + ), + SplitTest( + "arity_objectid", + expr={"$split": ObjectId()}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare ObjectId as non-array argument", + ), + SplitTest( + "arity_regex", + expr={"$split": Regex("pattern")}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare regex as non-array argument", + ), + SplitTest( + "arity_timestamp", + expr={"$split": Timestamp(1, 1)}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare Timestamp as non-array argument", + ), + SplitTest( + "arity_minkey", + expr={"$split": MinKey()}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare MinKey as non-array argument", + ), + SplitTest( + "arity_maxkey", + expr={"$split": MaxKey()}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare MaxKey as non-array argument", + ), + SplitTest( + "arity_code", + expr={"$split": Code("function() {}")}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare Code as non-array argument", + ), + SplitTest( + "arity_code_scope", + expr={"$split": Code("function() {}", {"x": 1})}, + error_code=EXPRESSION_TYPE_MISMATCH_ERROR, + msg="$split should reject bare Code with scope as non-array argument", + ), +] + +SPLIT_INVALID_ARGS_TESTS = ( + SPLIT_EMPTY_DELIM_ERROR_TESTS + SPLIT_SYNTAX_TESTS + SPLIT_ARITY_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_INVALID_ARGS_TESTS)) +def test_split_invalid_args_cases(collection, test_case: SplitTest): + """Test $split empty delimiter, syntax validation, and arity error cases.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_invariants.py new file mode 100644 index 00000000..cf6cc859 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_invariants.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_project, +) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.split_common import ( + SplitTest, +) + +# Property [Return Type]: the result is always an array when the expression +# succeeds. +SPLIT_RETURN_TYPE_TESTS: list[SplitTest] = [ + SplitTest( + "return_type_basic", + string="a-b", + delimiter="-", + msg="$split result should be an array of strings for basic split", + ), + SplitTest( + "return_type_no_match", + string="hello", + delimiter="-", + msg="$split result should be an array of strings when delimiter not found", + ), + SplitTest( + "return_type_empty_string", + string="", + delimiter="-", + msg="$split result should be an array of strings for empty input", + ), + SplitTest( + "return_type_unicode", + string="café", + delimiter="é", + msg="$split result should be an array of strings for unicode input", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_RETURN_TYPE_TESTS)) +def test_split_return_type(collection, test_case: SplitTest): + """Test $split result is always an array of strings.""" + split = {"$split": [test_case.string, test_case.delimiter]} + result = execute_project( + collection, + { + "isArray": {"$isArray": split}, + "allStrings": { + "$allElementsTrue": { + "$map": { + "input": split, + "as": "el", + "in": {"$eq": [{"$type": "$$el"}, "string"]}, + } + } + }, + }, + ) + assertSuccess(result, [{"isArray": True, "allStrings": True}], msg=test_case.msg) + + +# Property [Round-Trip]: joining the result array elements with the delimiter +# reproduces the original input string. +SPLIT_ROUND_TRIP_TESTS: list[SplitTest] = [ + SplitTest( + "round_trip_basic", + string="a-b-c", + delimiter="-", + msg="$split round-trip should reproduce original string", + ), + SplitTest( + "round_trip_no_match", + string="hello", + delimiter="-", + msg="$split round-trip should reproduce string when delimiter not found", + ), + SplitTest( + "round_trip_delim_at_edges", + string="-a-", + delimiter="-", + msg="$split round-trip should reproduce string with delimiter at edges", + ), + SplitTest( + "round_trip_consecutive", + string="a--b", + delimiter="-", + msg="$split round-trip should reproduce string with consecutive delimiters", + ), + SplitTest( + "round_trip_empty_string", + string="", + delimiter="-", + msg="$split round-trip should reproduce empty string", + ), + SplitTest( + "round_trip_all_delimiters", + string="---", + delimiter="-", + msg="$split round-trip should reproduce string of only delimiters", + ), + SplitTest( + "round_trip_multi_char_delim", + string="aXYbXYc", + delimiter="XY", + msg="$split round-trip should reproduce string with multi-char delimiter", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_ROUND_TRIP_TESTS)) +def test_split_round_trip(collection, test_case: SplitTest): + """Test $split round-trip: joining result with delimiter reproduces input.""" + split = {"$split": [test_case.string, test_case.delimiter]} + result = execute_project( + collection, + { + "roundTrip": { + "$reduce": { + "input": split, + "initialValue": None, + "in": { + "$cond": [ + {"$eq": ["$$value", None]}, + "$$this", + {"$concat": ["$$value", test_case.delimiter, "$$this"]}, + ] + }, + } + }, + "original": test_case.string, + }, + ) + assertSuccess( + result, [{"roundTrip": test_case.string, "original": test_case.string}], msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_null.py new file mode 100644 index 00000000..43b12a09 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_null.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import MISSING + +from .utils.split_common import ( + SplitTest, + _expr, +) + +# Property [Null Propagation]: if either argument is null or missing, the result +# is null. +SPLIT_NULL_TESTS: list[SplitTest] = [ + SplitTest( + "null_string", + string=None, + delimiter="-", + expected=None, + msg="$split should return null when string is null", + ), + SplitTest( + "null_delimiter", + string="hello-world", + delimiter=None, + expected=None, + msg="$split should return null when delimiter is null", + ), + SplitTest( + "null_both", + string=None, + delimiter=None, + expected=None, + msg="$split should return null when both arguments are null", + ), + SplitTest( + "missing_string", + string=MISSING, + delimiter="-", + expected=None, + msg="$split should return null when string is missing", + ), + SplitTest( + "missing_delimiter", + string="hello-world", + delimiter=MISSING, + expected=None, + msg="$split should return null when delimiter is missing", + ), + SplitTest( + "missing_both", + string=MISSING, + delimiter=MISSING, + expected=None, + msg="$split should return null when both arguments are missing", + ), + SplitTest( + "null_string_missing_delimiter", + string=None, + delimiter=MISSING, + expected=None, + msg="$split should return null when string is null and delimiter is missing", + ), + SplitTest( + "missing_string_null_delimiter", + string=MISSING, + delimiter=None, + expected=None, + msg="$split should return null when string is missing and delimiter is null", + ), +] + +# Property [Null Precedence over Type Error]: null in either position takes +# precedence over a type error in the other position. +SPLIT_NULL_PRECEDENCE_TESTS: list[SplitTest] = [ + SplitTest( + "null_precedes_type_error_in_delimiter", + string=None, + delimiter=42, + expected=None, + msg="$split should return null when string is null even if delimiter has wrong type", + ), + SplitTest( + "null_precedes_type_error_in_string", + string=42, + delimiter=None, + expected=None, + msg="$split should return null when delimiter is null even if string has wrong type", + ), + SplitTest( + "missing_precedes_type_error_in_delimiter", + string=MISSING, + delimiter=42, + expected=None, + msg="$split should return null when string is missing even if delimiter has wrong type", + ), + SplitTest( + "missing_precedes_type_error_in_string", + string=42, + delimiter=MISSING, + expected=None, + msg="$split should return null when delimiter is missing even if string has wrong type", + ), +] + +# Property [Null Precedence over Empty Delimiter Error]: null in either position +# takes precedence over the empty delimiter error. +SPLIT_NULL_EMPTY_DELIM_PRECEDENCE_TESTS: list[SplitTest] = [ + SplitTest( + "null_precedes_empty_delim", + string=None, + delimiter="", + expected=None, + msg="$split should return null when string is null even with empty delimiter", + ), + SplitTest( + "missing_precedes_empty_delim", + string=MISSING, + delimiter="", + expected=None, + msg="$split should return null when string is missing even with empty delimiter", + ), +] + +SPLIT_NULL_ALL_TESTS = ( + SPLIT_NULL_TESTS + SPLIT_NULL_PRECEDENCE_TESTS + SPLIT_NULL_EMPTY_DELIM_PRECEDENCE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_NULL_ALL_TESTS)) +def test_split_null_cases(collection, test_case: SplitTest): + """Test $split null propagation and null precedence cases.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_size_limit.py new file mode 100644 index 00000000..d11c6387 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_size_limit.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES + +from .utils.split_common import ( + SplitTest, + _expr, +) + +# Property [String Size Limit - Success]: input one byte under the limit is accepted. +SPLIT_SIZE_LIMIT_SUCCESS_TESTS: list[SplitTest] = [ + SplitTest( + "size_string_one_under", + string="a" * (STRING_SIZE_LIMIT_BYTES - 1), + delimiter="-", + expected=["a" * (STRING_SIZE_LIMIT_BYTES - 1)], + msg="$split should accept input string one byte under the size limit", + ), + SplitTest( + "size_delim_one_under", + string="hello", + delimiter="a" * (STRING_SIZE_LIMIT_BYTES - 1), + expected=["hello"], + msg="$split should accept delimiter one byte under the size limit", + ), +] + + +# Property [String Size Limit Error]: an input string or delimiter of +# Property [String Size Limit - Error]: input or delimiter at the size limit produces an error. +SPLIT_SIZE_LIMIT_ERROR_TESTS: list[SplitTest] = [ + SplitTest( + "size_limit_input_at_boundary", + string="a" * STRING_SIZE_LIMIT_BYTES, + delimiter="-", + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$split should reject input string at the size limit", + ), + SplitTest( + "size_limit_delim_at_boundary", + string="hello", + delimiter="a" * STRING_SIZE_LIMIT_BYTES, + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$split should reject delimiter at the size limit", + ), + SplitTest( + "size_limit_input_2byte", + string="\u00e9" * (STRING_SIZE_LIMIT_BYTES // 2), + delimiter="-", + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$split should reject 2-byte char input totaling the size limit", + ), + # Sub-expression producing oversized result is caught before $split runs. + SplitTest( + "size_limit_input_subexpr", + string={ + "$concat": ["a" * (STRING_SIZE_LIMIT_BYTES // 2), "a" * (STRING_SIZE_LIMIT_BYTES // 2)] + }, + delimiter="-", + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$split should reject sub-expression producing oversized string", + ), +] + +SPLIT_SIZE_LIMIT_TESTS = SPLIT_SIZE_LIMIT_SUCCESS_TESTS + SPLIT_SIZE_LIMIT_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_SIZE_LIMIT_TESTS)) +def test_split_size_limit_cases(collection, test_case: SplitTest): + """Test $split string size limit cases.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_type_errors.py new file mode 100644 index 00000000..1fe25b7d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/test_split_type_errors.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Decimal128, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from bson.code import Code + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.error_codes import ( + SPLIT_DELIMITER_TYPE_ERROR, + SPLIT_STRING_TYPE_ERROR, +) +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import FLOAT_NAN + +from .utils.split_common import ( + SplitTest, + _expr, +) + +# Property [Type Strictness - String Argument]: any non-string, non-null first +# argument produces SPLIT_STRING_TYPE_ERROR. +SPLIT_STRING_TYPE_ERROR_TESTS: list[SplitTest] = [ + SplitTest( + "type_string_int", + string=42, + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject int as string argument", + ), + SplitTest( + "type_string_float", + string=3.14, + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject float as string argument", + ), + SplitTest( + "type_string_long", + string=Int64(1), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject Int64 as string argument", + ), + SplitTest( + "type_string_decimal", + string=Decimal128("1"), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject Decimal128 as string argument", + ), + SplitTest( + "type_string_bool", + string=True, + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject boolean as string argument", + ), + SplitTest( + "type_string_array", + string=["a"], + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject array as string argument", + ), + SplitTest( + "type_string_object", + string={"a": 1}, + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject object as string argument", + ), + SplitTest( + "type_string_binary", + string=Binary(b"data"), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject binary as string argument", + ), + SplitTest( + "type_string_date", + string=datetime(2024, 1, 1, tzinfo=timezone.utc), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject datetime as string argument", + ), + SplitTest( + "type_string_objectid", + string=ObjectId("507f1f77bcf86cd799439011"), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject ObjectId as string argument", + ), + SplitTest( + "type_string_regex", + string=Regex("pat"), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject regex as string argument", + ), + SplitTest( + "type_string_maxkey", + string=MaxKey(), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject MaxKey as string argument", + ), + SplitTest( + "type_string_minkey", + string=MinKey(), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject MinKey as string argument", + ), + SplitTest( + "type_string_timestamp", + string=Timestamp(1, 1), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject Timestamp as string argument", + ), + SplitTest( + "type_string_code", + string=Code("function() {}"), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject Code as string argument", + ), + SplitTest( + "type_string_code_scope", + string=Code("function() {}", {"x": 1}), + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject Code with scope as string argument", + ), + SplitTest( + "type_string_nan", + string=FLOAT_NAN, + delimiter="-", + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should reject NaN as string argument", + ), +] + +# Property [Type Strictness - Delimiter Argument]: any non-string, non-null +# second argument produces SPLIT_DELIMITER_TYPE_ERROR. +SPLIT_DELIM_TYPE_ERROR_TESTS: list[SplitTest] = [ + SplitTest( + "type_delim_int", + string="hello", + delimiter=42, + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject int as delimiter", + ), + SplitTest( + "type_delim_float", + string="hello", + delimiter=3.14, + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject float as delimiter", + ), + SplitTest( + "type_delim_long", + string="hello", + delimiter=Int64(1), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject Int64 as delimiter", + ), + SplitTest( + "type_delim_decimal", + string="hello", + delimiter=Decimal128("1"), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject Decimal128 as delimiter", + ), + SplitTest( + "type_delim_bool", + string="hello", + delimiter=True, + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject boolean as delimiter", + ), + SplitTest( + "type_delim_array", + string="hello", + delimiter=["a"], + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject array as delimiter", + ), + SplitTest( + "type_delim_object", + string="hello", + delimiter={"a": 1}, + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject object as delimiter", + ), + SplitTest( + "type_delim_binary", + string="hello", + delimiter=Binary(b"data"), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject binary as delimiter", + ), + SplitTest( + "type_delim_date", + string="hello", + delimiter=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject datetime as delimiter", + ), + SplitTest( + "type_delim_objectid", + string="hello", + delimiter=ObjectId("507f1f77bcf86cd799439011"), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject ObjectId as delimiter", + ), + SplitTest( + "type_delim_regex", + string="hello", + delimiter=Regex("pat"), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject regex as delimiter", + ), + SplitTest( + "type_delim_maxkey", + string="hello", + delimiter=MaxKey(), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject MaxKey as delimiter", + ), + SplitTest( + "type_delim_minkey", + string="hello", + delimiter=MinKey(), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject MinKey as delimiter", + ), + SplitTest( + "type_delim_timestamp", + string="hello", + delimiter=Timestamp(1, 1), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject Timestamp as delimiter", + ), + SplitTest( + "type_delim_code", + string="hello", + delimiter=Code("function() {}"), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject Code as delimiter", + ), + SplitTest( + "type_delim_code_scope", + string="hello", + delimiter=Code("function() {}", {"x": 1}), + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject Code with scope as delimiter", + ), + SplitTest( + "type_delim_nan", + string="hello", + delimiter=FLOAT_NAN, + error_code=SPLIT_DELIMITER_TYPE_ERROR, + msg="$split should reject NaN as delimiter", + ), +] + +# Property [Type Error Precedence]: when both arguments are invalid types, the +# first argument's type error takes precedence. +SPLIT_TYPE_PRECEDENCE_TESTS: list[SplitTest] = [ + SplitTest( + "type_precedence_both_invalid", + string=42, + delimiter=42, + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should report string type error when both arguments have wrong type", + ), + SplitTest( + "type_precedence_bool_float", + string=True, + delimiter=3.14, + error_code=SPLIT_STRING_TYPE_ERROR, + msg="$split should report string type error first when both arguments are non-string", + ), +] + +SPLIT_TYPE_ERROR_ALL_TESTS = ( + SPLIT_STRING_TYPE_ERROR_TESTS + SPLIT_DELIM_TYPE_ERROR_TESTS + SPLIT_TYPE_PRECEDENCE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(SPLIT_TYPE_ERROR_ALL_TESTS)) +def test_split_type_error_cases(collection, test_case: SplitTest): + """Test $split type strictness cases.""" + result = execute_expression(collection, _expr(test_case)) + assert_expression_result( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/utils/split_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/utils/split_common.py new file mode 100644 index 00000000..e431407c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/split/utils/split_common.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class SplitTest(BaseTestCase): + """Test case for $split operator.""" + + string: Any = None + delimiter: Any = None + expr: Any = None # Raw expression override for arity/syntax tests + + +def _expr(test_case: SplitTest) -> dict[str, Any]: + if test_case.expr is not None: + return cast(dict[str, Any], test_case.expr) + return {"$split": [test_case.string, test_case.delimiter]} diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index c285b5aa..8acbcda0 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -4,11 +4,16 @@ """ DIVIDE_BY_ZERO_ERROR = 2 +FAILED_TO_PARSE_ERROR = 9 TYPE_MISMATCH_ERROR = 14 +BSON_TO_STRING_CONVERSION_ERROR = 16007 EXPRESSION_TYPE_MISMATCH_ERROR = 16020 +STRING_SIZE_LIMIT_ERROR = 16493 MODULO_ZERO_REMAINDER_ERROR = 16610 MODULO_NON_NUMERIC_ERROR = 16611 MORE_THAN_ONE_DATE_ERROR = 16612 +CONCAT_TYPE_ERROR = 16702 +INVALID_DOLLAR_FIELD_PATH = 16872 ABS_OVERFLOW_ERROR = 28680 LOG_NON_NUMERIC_VALUE_ERROR = 28756 LOG_NON_NUMERIC_BASE_ERROR = 28757 @@ -20,4 +25,7 @@ POW_BASE_ZERO_EXP_NEGATIVE_ERROR = 28764 NON_NUMERIC_TYPE_MISMATCH_ERROR = 28765 LN_NON_POSITIVE_INPUT_ERROR = 28766 +SPLIT_STRING_TYPE_ERROR = 40085 +SPLIT_EMPTY_SEPARATOR_ERROR = 40087 MODULO_DECIMAL128_ZERO_REMAINDER_ERROR = 5733415 +SPLIT_DELIMITER_TYPE_ERROR = 10503900 diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py index 9e3088ed..d0aa0200 100644 --- a/documentdb_tests/framework/test_constants.py +++ b/documentdb_tests/framework/test_constants.py @@ -64,6 +64,7 @@ # Other constant values MISSING = "$missing" +STRING_SIZE_LIMIT_BYTES = 16 * 1024 * 1024 # Int32 lists NUMERIC_INT32_NEGATIVE = [INT32_UNDERFLOW, INT32_MIN]