From 9d431ce3c1158dd9f9c9ec4de40524f2684a1f57 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Fri, 27 Mar 2026 21:42:26 +0000 Subject: [PATCH 1/2] Add string operator $regex* tests Signed-off-by: Daniel Frankcom --- .../regexFind/test_regexFind_invalid_args.py | 273 ++++++++++++ .../regexFind/test_regexFind_matching.py | 286 +++++++++++++ .../string/regexFind/test_regexFind_null.py | 141 +++++++ .../regexFind/test_regexFind_options.py | 167 ++++++++ .../regexFind/test_regexFind_precedence.py | 126 ++++++ .../regexFind/test_regexFind_size_limit.py | 63 +++ .../regexFind/test_regexFind_type_errors.py | 390 ++++++++++++++++++ .../string/regexFind/test_regexFind_usage.py | 150 +++++++ .../string/regexFind/utils/__init__.py | 0 .../regexFind/utils/regexFind_common.py | 29 ++ .../test_regexFindAll_encoding.py | 150 +++++++ .../test_regexFindAll_invalid_args.py | 249 +++++++++++ .../test_regexFindAll_invariants.py | 196 +++++++++ .../test_regexFindAll_matching.py | 301 ++++++++++++++ .../regexFindAll/test_regexFindAll_null.py | 125 ++++++ .../regexFindAll/test_regexFindAll_options.py | 195 +++++++++ .../test_regexFindAll_precedence.py | 189 +++++++++ .../test_regexFindAll_size_limit.py | 74 ++++ .../test_regexFindAll_type_errors.py | 390 ++++++++++++++++++ .../regexFindAll/test_regexFindAll_usage.py | 97 +++++ .../string/regexFindAll/utils/__init__.py | 0 .../regexFindAll/utils/regexFindAll_common.py | 29 ++ .../test_regexMatch_invalid_args.py | 278 +++++++++++++ .../regexMatch/test_regexMatch_matching.py | 165 ++++++++ .../string/regexMatch/test_regexMatch_null.py | 153 +++++++ .../regexMatch/test_regexMatch_options.py | 174 ++++++++ .../regexMatch/test_regexMatch_precedence.py | 126 ++++++ .../regexMatch/test_regexMatch_size_limit.py | 63 +++ .../regexMatch/test_regexMatch_type_errors.py | 390 ++++++++++++++++++ .../regexMatch/test_regexMatch_usage.py | 55 +++ .../string/regexMatch/utils/__init__.py | 0 .../regexMatch/utils/regexMatch_common.py | 29 ++ 32 files changed, 5053 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/utils/regexFind_common.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/utils/regexFindAll_common.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/utils/regexMatch_common.py diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py new file mode 100644 index 00000000..9c8fd0a6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py @@ -0,0 +1,273 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, + REGEX_BAD_OPTION_ERROR, + REGEX_BAD_PATTERN_ERROR, + REGEX_MISSING_INPUT_ERROR, + REGEX_MISSING_REGEX_ERROR, + REGEX_NON_OBJECT_ERROR, + REGEX_NULL_BYTE_ERROR, + REGEX_OPTIONS_CONFLICT_ERROR, + REGEX_OPTIONS_NULL_BYTE_ERROR, + REGEX_UNKNOWN_FIELD_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Syntax Validation]: missing required fields or unknown fields produce errors. +REGEXFIND_SYNTAX_ERROR_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "syntax_non_object_string", + expr={"$regexFind": "string"}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFind should reject string as argument", + ), + RegexFindTest( + "syntax_non_object_array", + expr={"$regexFind": ["array"]}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFind should reject array as argument", + ), + RegexFindTest( + "syntax_non_object_null", + expr={"$regexFind": None}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFind should reject null as argument", + ), + RegexFindTest( + "syntax_non_object_int", + expr={"$regexFind": 42}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFind should reject int as argument", + ), + RegexFindTest( + "syntax_non_object_bool", + expr={"$regexFind": True}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFind should reject bool as argument", + ), + RegexFindTest( + "syntax_empty_object", + expr={"$regexFind": {}}, + error_code=REGEX_MISSING_INPUT_ERROR, + msg="$regexFind should reject empty object", + ), + RegexFindTest( + "syntax_missing_input", + expr={"$regexFind": {"regex": "abc"}}, + error_code=REGEX_MISSING_INPUT_ERROR, + msg="$regexFind should reject missing input field", + ), + RegexFindTest( + "syntax_missing_regex", + expr={"$regexFind": {"input": "abc"}}, + error_code=REGEX_MISSING_REGEX_ERROR, + msg="$regexFind should reject missing regex field", + ), + RegexFindTest( + "syntax_unknown_field", + expr={"$regexFind": {"input": "abc", "regex": "abc", "bogus": 1}}, + error_code=REGEX_UNKNOWN_FIELD_ERROR, + msg="$regexFind should reject unknown fields", + ), +] + +# Property [Options Placement]: when regex is a BSON Regex with flags, specifying the options +# field produces an error, even if the flags are equivalent or options is empty. +REGEXFIND_OPTIONS_CONFLICT_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "options_conflict_same_flags", + input="hello", + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFind should error when options duplicates BSON flags", + ), + RegexFindTest( + "options_conflict_different_flags", + input="hello", + regex=Regex("hello", "i"), + options="m", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFind should error when options differs from BSON flags", + ), + RegexFindTest( + "options_conflict_empty_options", + input="hello", + regex=Regex("hello", "i"), + options="", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFind should error on empty options with BSON flags", + ), +] + +# Property [Type Strictness - pattern]: invalid regex pattern produces an error. +REGEXFIND_BAD_PATTERN_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "type_bad_pattern_bracket", + input="abc", + regex="[invalid", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFind should reject unclosed bracket in pattern", + ), + RegexFindTest( + "type_bad_pattern_paren", + input="abc", + regex="(unclosed", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFind should reject unclosed paren in pattern", + ), + RegexFindTest( + "type_bad_pattern_var_lookbehind", + input="abc", + regex="(?<=a+)b", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFind should reject variable-length lookbehind", + ), +] + +# Property [Regex Pattern - null byte]: embedded null byte in regex pattern produces an error. +REGEXFIND_NULL_BYTE_PATTERN_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "type_null_byte_in_pattern", + input="abc", + regex="ab\x00c", + error_code=REGEX_NULL_BYTE_ERROR, + msg="$regexFind should reject null byte in regex pattern", + ), +] + +# Property [Type Strictness - option char]: unrecognized option character produces an error. +# Leading/trailing whitespace and mixed valid/invalid flags also produce errors. +REGEXFIND_BAD_OPTION_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "type_bad_option", + input="abc", + regex="abc", + options="z", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFind should reject unrecognized option character", + ), + RegexFindTest( + "type_bad_option_leading_whitespace", + input="abc", + regex="abc", + options=" i", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFind should reject leading whitespace in options", + ), + RegexFindTest( + "type_bad_option_trailing_whitespace", + input="abc", + regex="abc", + options="i ", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFind should reject trailing whitespace in options", + ), + RegexFindTest( + "type_bad_option_mixed_valid_invalid", + input="abc", + regex="abc", + options="ig", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFind should reject mix of valid and invalid options", + ), + RegexFindTest( + "type_bad_option_uppercase_I", + input="abc", + regex="abc", + options="I", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFind should reject uppercase option character", + ), +] + +# Property [Null Byte in Options]: null byte in options string produces a distinct error. +REGEXFIND_OPTIONS_NULL_BYTE_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "type_null_byte_in_options", + input="abc", + regex="abc", + options="i\x00m", + error_code=REGEX_OPTIONS_NULL_BYTE_ERROR, + msg="$regexFind should reject null byte in options string", + ), +] + +# Property [Dollar Sign Error]: a bare "$" is interpreted as a field path and "$$" is interpreted +# as an empty variable name. +REGEXFIND_DOLLAR_ERROR_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "dollar_bare_input", + input="$", + regex="abc", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexFind should reject bare '$' as input field path", + ), + RegexFindTest( + "dollar_bare_regex", + input="hello", + regex="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexFind should reject bare '$' as regex field path", + ), + RegexFindTest( + "dollar_bare_options", + input="hello", + regex="abc", + options="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexFind should reject bare '$' as options field path", + ), + RegexFindTest( + "dollar_double_input", + input="$$", + regex="abc", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexFind should reject '$$' as empty variable in input", + ), + RegexFindTest( + "dollar_double_regex", + input="hello", + regex="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexFind should reject '$$' as empty variable in regex", + ), + RegexFindTest( + "dollar_double_options", + input="hello", + regex="abc", + options="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexFind should reject '$$' as empty variable in options", + ), +] + +REGEXFIND_INVALID_ARGS_ALL_TESTS = ( + REGEXFIND_SYNTAX_ERROR_TESTS + + REGEXFIND_OPTIONS_CONFLICT_TESTS + + REGEXFIND_BAD_PATTERN_TESTS + + REGEXFIND_NULL_BYTE_PATTERN_TESTS + + REGEXFIND_BAD_OPTION_TESTS + + REGEXFIND_OPTIONS_NULL_BYTE_TESTS + + REGEXFIND_DOLLAR_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_INVALID_ARGS_ALL_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind invalid argument cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py new file mode 100644 index 00000000..81fc8e97 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py @@ -0,0 +1,286 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [First Match Only]: only the first match in the input string is returned. +REGEXFIND_FIRST_MATCH_TESTS: list[RegexFindTest] = [ + # Second match is "456" but only the first should be returned. + RegexFindTest( + "first_match_two_numeric", + input="abc123def456", + regex="[0-9]+", + expected={"match": "123", "idx": 3, "captures": []}, + msg="$regexFind should return only the first numeric match", + ), + # Greedy quantifier matches longest at first position. + RegexFindTest( + "first_match_greedy", + input="aabab", + regex="a.*b", + expected={"match": "aabab", "idx": 0, "captures": []}, + msg="$regexFind should return greedy match at first position", + ), + # Lazy quantifier matches shortest at first position. + RegexFindTest( + "first_match_lazy", + input="aabab", + regex="a.*?b", + expected={"match": "aab", "idx": 0, "captures": []}, + msg="$regexFind should return lazy match at first position", + ), +] + +# Property [idx Code Point Semantics]: idx counts Unicode code points, not bytes. Each character +# contributes 1 regardless of UTF-8 byte width. +REGEXFIND_IDX_CODEPOINT_TESTS: list[RegexFindTest] = [ + # é (U+00E9) is 2 bytes. Byte index would be 2, codepoint index is 1. + RegexFindTest( + "idx_cp_2byte_prefix", + input="\u00e9abc", + regex="abc", + expected={"match": "abc", "idx": 1, "captures": []}, + msg="$regexFind idx should count 2-byte char as one codepoint", + ), + # 日 (U+65E5) is 3 bytes. Byte index would be 3, codepoint index is 1. + RegexFindTest( + "idx_cp_3byte_prefix", + input="日abc", + regex="abc", + expected={"match": "abc", "idx": 1, "captures": []}, + msg="$regexFind idx should count 3-byte char as one codepoint", + ), + # 🎉 (U+1F389) is 4 bytes. Byte index would be 4, codepoint index is 1. + RegexFindTest( + "idx_cp_4byte_prefix", + input="🎉abc", + regex="abc", + expected={"match": "abc", "idx": 1, "captures": []}, + msg="$regexFind idx should count 4-byte char as one codepoint", + ), + # Mix of 2-byte, 3-byte, and 4-byte chars. Byte index would be 9, codepoint index is 3. + RegexFindTest( + "idx_cp_mixed_byte_widths", + input="\u00e9日🎉abc", + regex="abc", + expected={"match": "abc", "idx": 3, "captures": []}, + msg="$regexFind idx should count mixed multi-byte chars as codepoints", + ), + # Three 3-byte CJK chars. Byte index would be 9, codepoint index is 3. + RegexFindTest( + "idx_cp_cjk_prefix", + input="日本語abc", + regex="abc", + expected={"match": "abc", "idx": 3, "captures": []}, + msg="$regexFind idx should count CJK chars as codepoints", + ), + # Combining mark (e + U+0301) is two codepoints, not one. idx would be 1 if normalized + # to precomposed U+00E9. + RegexFindTest( + "idx_cp_combining_mark", + input="e\u0301abc", + regex="abc", + expected={"match": "abc", "idx": 2, "captures": []}, + msg="$regexFind idx should count combining mark as separate codepoint", + ), +] + +# Property [Captures Behavior]: captures array length equals the number of capture groups, in +# pattern order. Unmatched branches produce null. Non-capturing groups are excluded. Nested +# groups are each represented. +REGEXFIND_CAPTURES_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "captures_single_group", + input="abc123", + regex="([0-9]+)", + expected={"match": "123", "idx": 3, "captures": ["123"]}, + msg="$regexFind should capture single group", + ), + # Order matches left-to-right group appearance. + RegexFindTest( + "captures_two_groups_order", + input="John Smith", + regex="(\\w+) (\\w+)", + expected={"match": "John Smith", "idx": 0, "captures": ["John", "Smith"]}, + msg="$regexFind should capture groups in left-to-right order", + ), + # Second alternation branch unmatched produces null in that position. + RegexFindTest( + "captures_unmatched_branch", + input="cat", + regex="(cat)|(dog)", + expected={"match": "cat", "idx": 0, "captures": ["cat", None]}, + msg="$regexFind should produce null for unmatched alternation branch", + ), + # Non-capturing group excluded from captures. + RegexFindTest( + "captures_non_capturing_excluded", + input="abc123", + regex="(?:abc)([0-9]+)", + expected={"match": "abc123", "idx": 0, "captures": ["123"]}, + msg="$regexFind should exclude non-capturing group from captures", + ), + # Nested groups: outer then inner, left to right. + RegexFindTest( + "captures_nested_groups", + input="abc", + regex="((a)(b))c", + expected={"match": "abc", "idx": 0, "captures": ["ab", "a", "b"]}, + msg="$regexFind should capture nested groups outer then inner", + ), + # Named group included in captures without name. + RegexFindTest( + "captures_named_groups", + input="abc123", + regex="(?P[a-z]+)(?P[0-9]+)", + expected={"match": "abc123", "idx": 0, "captures": ["abc", "123"]}, + msg="$regexFind should include named groups in captures array", + ), + # Empty capture group captures empty string. + RegexFindTest( + "captures_empty_group", + input="abc", + regex="()abc", + expected={"match": "abc", "idx": 0, "captures": [""]}, + msg="$regexFind should capture empty string for empty group", + ), + # Lookahead with capture. + RegexFindTest( + "captures_lookahead", + input="foobar", + regex="foo(?=(bar))", + expected={"match": "foo", "idx": 0, "captures": ["bar"]}, + msg="$regexFind should capture inside lookahead", + ), +] + +# Property [Encoding]: multi-byte UTF-8 characters in the match itself are preserved correctly. +REGEXFIND_ENCODING_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "encoding_2byte_in_match", + input="caf\u00e9", + regex="\u00e9", + expected={"match": "\u00e9", "idx": 3, "captures": []}, + msg="$regexFind should match 2-byte UTF-8 character", + ), + RegexFindTest( + "encoding_3byte_in_match", + input="hello日world", + regex="日", + expected={"match": "日", "idx": 5, "captures": []}, + msg="$regexFind should match 3-byte UTF-8 character", + ), + RegexFindTest( + "encoding_4byte_in_match", + input="hello🎉world", + regex="🎉", + expected={"match": "🎉", "idx": 5, "captures": []}, + msg="$regexFind should match 4-byte UTF-8 emoji", + ), + RegexFindTest( + "encoding_multibyte_span", + input="日本語", + regex="本語", + expected={"match": "本語", "idx": 1, "captures": []}, + msg="$regexFind should match span of multi-byte characters", + ), + RegexFindTest( + "encoding_combining_mark_in_match", + input="e\u0301", + regex="e\u0301", + expected={"match": "e\u0301", "idx": 0, "captures": []}, + msg="$regexFind should preserve combining mark in match", + ), + RegexFindTest( + "encoding_precomposed_not_decomposed", + input="\u00e9", + regex="e\u0301", + expected=None, + msg="$regexFind should not normalize precomposed to decomposed", + ), + RegexFindTest( + "encoding_s_no_nbsp", + input="\u00a0hello", + regex="\\s", + expected=None, + msg="$regexFind \\s should not match NBSP", + ), +] + +# Property [Edge Cases]: empty strings, large inputs, and control characters are handled +# correctly. +REGEXFIND_EDGE_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "edge_empty_input_empty_regex", + input="", + regex="", + expected={"match": "", "idx": 0, "captures": []}, + msg="$regexFind should match empty regex on empty input at idx 0", + ), + RegexFindTest( + "edge_nonempty_input_empty_regex", + input="hello", + regex="", + expected={"match": "", "idx": 0, "captures": []}, + msg="$regexFind empty regex should match at idx 0 of non-empty input", + ), + RegexFindTest( + "edge_empty_input_nonempty_regex", + input="", + regex="abc", + expected=None, + msg="$regexFind should return null for no match on empty input", + ), + RegexFindTest( + "edge_newline", + input="hello\nworld", + regex="world", + expected={"match": "world", "idx": 6, "captures": []}, + msg="$regexFind should match across newline in input", + ), + RegexFindTest( + "edge_tab", + input="hello\tworld", + regex="world", + expected={"match": "world", "idx": 6, "captures": []}, + msg="$regexFind should match across tab in input", + ), + RegexFindTest( + "edge_null_byte", + input="hello\x00world", + regex="world", + expected={"match": "world", "idx": 6, "captures": []}, + msg="$regexFind should match across null byte in input", + ), + RegexFindTest( + "edge_carriage_return", + input="hello\rworld", + regex="world", + expected={"match": "world", "idx": 6, "captures": []}, + msg="$regexFind should match across carriage return in input", + ), +] + +REGEXFIND_MATCHING_ALL_TESTS = ( + REGEXFIND_FIRST_MATCH_TESTS + + REGEXFIND_IDX_CODEPOINT_TESTS + + REGEXFIND_CAPTURES_TESTS + + REGEXFIND_ENCODING_TESTS + + REGEXFIND_EDGE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_MATCHING_ALL_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind matching behavior cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py new file mode 100644 index 00000000..128ab39f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +_PLACEHOLDER = object() +_NULL_PATTERNS = [ + (_PLACEHOLDER, "abc", "input"), + ("hello", _PLACEHOLDER, "regex"), + (_PLACEHOLDER, _PLACEHOLDER, "both"), +] + + +_MSG_MAP = { + "input": "$regexFind should return null when input is {kind}", + "regex": "$regexFind should return null when regex is {kind}", + "both": "$regexFind should return null when both args are {kind}", +} + + +def _build_null_tests(null_value, prefix) -> list[RegexFindTest]: + kind = "null" if null_value is None else "missing" + return [ + RegexFindTest( + f"{prefix}_{suffix}", + input=null_value if _input is _PLACEHOLDER else _input, + regex=null_value if _regex is _PLACEHOLDER else _regex, + expected=None, + msg=_MSG_MAP[suffix].format(kind=kind), + ) + for _input, _regex, suffix in _NULL_PATTERNS + ] + + +# Property [Null Propagation]: null in input or regex causes the result to be null. +REGEXFIND_NULL_TESTS = _build_null_tests(None, "null") + +# Property [Null Propagation - missing]: missing fields in input or regex are treated as null. +REGEXFIND_MISSING_TESTS = _build_null_tests(MISSING, "missing") + +# Property [Null Propagation - mixed]: combining null and missing across input and regex +# still produces null. +REGEXFIND_MIXED_NULL_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "mixed_null_input_missing_regex", + input=None, + regex=MISSING, + expected=None, + msg="$regexFind should return null for null input and missing regex", + ), + RegexFindTest( + "mixed_missing_input_null_regex", + input=MISSING, + regex=None, + expected=None, + msg="$regexFind should return null for missing input and null regex", + ), +] + +# Property [Options Null]: null in options does not cause null propagation. +REGEXFIND_OPTIONS_NULL_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "options_null", + input="hello", + regex="hello", + options=None, + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should not null-propagate on null options", + ), +] + +# Property [Options Null - missing]: missing field in options does not cause null propagation. +REGEXFIND_OPTIONS_MISSING_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "options_missing", + input="hello", + regex="hello", + options=MISSING, + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should not null-propagate on missing options", + ), +] + +# Property [Null Propagation - expressions]: an expression that evaluates to null is treated +# identically to a null literal. +REGEXFIND_EXPR_NULL_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "expr_null_input", + input={"$cond": [False, "hello", None]}, + regex="hello", + expected=None, + msg="$regexFind should null-propagate when expr input resolves to null", + ), + RegexFindTest( + "expr_null_regex", + input="hello", + regex={"$cond": [False, "hello", None]}, + expected=None, + msg="$regexFind should null-propagate when expr regex resolves to null", + ), +] + +# Property [Null Precedence]: null propagation from regex takes precedence over bad option flag +# validation. +REGEXFIND_PRECEDENCE_SUCCESS_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "precedence_null_regex_over_bad_option", + input="abc", + regex=None, + options="z", + expected=None, + msg="$regexFind null regex should take precedence over bad option", + ), +] + +REGEXFIND_NULL_ALL_TESTS = ( + REGEXFIND_NULL_TESTS + + REGEXFIND_MISSING_TESTS + + REGEXFIND_MIXED_NULL_TESTS + + REGEXFIND_OPTIONS_NULL_TESTS + + REGEXFIND_OPTIONS_MISSING_TESTS + + REGEXFIND_EXPR_NULL_TESTS + + REGEXFIND_PRECEDENCE_SUCCESS_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_NULL_ALL_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind null propagation cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py new file mode 100644 index 00000000..0c7ed796 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Regex Options]: options field accepts i, m, s, x as a string or BSON Regex flags. +# Empty string, duplicates, and "u" (PCRE UTF-8 flag) are valid. Invalid BSON Regex flags are +# silently accepted. +REGEXFIND_OPTIONS_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "options_empty_string", + input="hello", + regex="hello", + options="", + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should accept empty string options", + ), + RegexFindTest( + "options_i_case_insensitive", + input="HELLO", + regex="hello", + options="i", + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind option 'i' should enable case-insensitive match", + ), + RegexFindTest( + "options_m_multiline", + input="line1\nline2", + regex="^line2", + options="m", + expected={"match": "line2", "idx": 6, "captures": []}, + msg="$regexFind option 'm' should match ^ at line start", + ), + RegexFindTest( + "options_m_crlf", + input="line1\r\nline2", + regex="^line2", + options="m", + expected={"match": "line2", "idx": 7, "captures": []}, + msg="$regexFind option 'm' should recognize CRLF as line ending", + ), + RegexFindTest( + "options_s_dotall", + input="line1\nline2", + regex="line1.line2", + options="s", + expected={"match": "line1\nline2", "idx": 0, "captures": []}, + msg="$regexFind option 's' should make dot match newline", + ), + RegexFindTest( + "options_x_extended", + input="hello", + regex="hel lo", + options="x", + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind option 'x' should ignore whitespace in pattern", + ), + RegexFindTest( + "options_u_silently_accepted", + input="hello", + regex="hello", + options="u", + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should silently accept 'u' option", + ), + RegexFindTest( + "options_combined_im", + input="HELLO\nWORLD", + regex="^world", + options="im", + expected={"match": "WORLD", "idx": 6, "captures": []}, + msg="$regexFind should support combined 'im' options", + ), + RegexFindTest( + "options_duplicate_ii", + input="HELLO", + regex="hello", + options="ii", + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind should accept duplicate option characters", + ), + RegexFindTest( + "options_bson_regex_flag_i", + input="HELLO", + regex=Regex("hello", "i"), + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind should honor BSON Regex 'i' flag", + ), + RegexFindTest( + "options_bson_regex_flag_s", + input="a\nb", + regex=Regex("a.b", "s"), + expected={"match": "a\nb", "idx": 0, "captures": []}, + msg="$regexFind should honor BSON Regex 's' flag", + ), + RegexFindTest( + "options_bson_regex_flag_x", + input="hello", + regex=Regex("hel lo", "x"), + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should honor BSON Regex 'x' flag", + ), + RegexFindTest( + "options_bson_regex_invalid_flag_silent", + input="hello", + regex=Regex("hello", "z"), + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should silently accept invalid BSON Regex flags", + ), + RegexFindTest( + "options_all_four_combined", + input="HELLO", + regex="hel lo", + options="imsx", + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind should support all four options combined", + ), + RegexFindTest( + "options_no_conflict_null_options_with_flags", + input="HELLO", + regex=Regex("hello", "i"), + options=None, + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind null options should not conflict with BSON flags", + ), + RegexFindTest( + "options_no_conflict_no_flags_with_options", + input="HELLO", + regex=Regex("hello"), + options="i", + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind should allow options with flagless BSON Regex", + ), + RegexFindTest( + "options_no_conflict_empty_flags_with_options", + input="HELLO", + regex=Regex("hello", ""), + options="i", + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind empty BSON flags should not conflict with options", + ), + RegexFindTest( + "options_no_conflict_unrecognized_flag", + input="hello", + regex=Regex("hello", "z"), + options="i", + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind unrecognized BSON flag should not conflict", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_OPTIONS_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind options cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py new file mode 100644 index 00000000..70094f6a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + REGEX_BAD_OPTION_ERROR, + REGEX_BAD_PATTERN_ERROR, + REGEX_INPUT_TYPE_ERROR, + REGEX_OPTIONS_CONFLICT_ERROR, + REGEX_OPTIONS_TYPE_ERROR, + REGEX_REGEX_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Error Precedence]: options type errors take precedence over null propagation. +# When both input and regex have wrong types, regex error wins. Wrong-type arguments error even +# when the other is null or missing. +REGEXFIND_PRECEDENCE_ERROR_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "precedence_options_over_null_input", + input=None, + regex="abc", + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind options type error should precede null input", + ), + RegexFindTest( + "precedence_options_over_null_regex", + input="abc", + regex=None, + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind options type error should precede null regex", + ), + RegexFindTest( + "precedence_options_over_both_null", + input=None, + regex=None, + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind options type error should precede both null", + ), + RegexFindTest( + "precedence_regex_over_input", + input=123, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind regex type error should precede input type error", + ), + RegexFindTest( + "precedence_input_type_with_null_regex", + input=123, + regex=None, + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should error on wrong-type input even with null regex", + ), + RegexFindTest( + "precedence_regex_type_with_null_input", + input=None, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should error on wrong-type regex even with null input", + ), + RegexFindTest( + "precedence_missing_input_wrong_regex", + input=MISSING, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind missing input should not bypass regex type check", + ), + RegexFindTest( + "precedence_wrong_input_missing_regex", + input=123, + regex=MISSING, + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind missing regex should not bypass input type check", + ), + RegexFindTest( + "precedence_bad_option_over_null_input", + input=None, + regex="abc", + options="z", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFind bad option should precede null input", + ), + RegexFindTest( + "precedence_bad_pattern_over_null_input", + input=None, + regex="[invalid", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFind bad pattern should precede null input", + ), + RegexFindTest( + "precedence_conflict_over_input_type", + input=123, + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFind options conflict should precede input type error", + ), + RegexFindTest( + "precedence_conflict_over_null_input", + input=None, + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFind options conflict should precede null input", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_PRECEDENCE_ERROR_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind error precedence cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py new file mode 100644 index 00000000..6651c98b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import REGEX_BAD_PATTERN_ERROR, STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import REGEX_PATTERN_LIMIT_BYTES, STRING_SIZE_LIMIT_BYTES +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [String Size Limit - Success]: input one byte under the limit is accepted. +REGEXFIND_SIZE_LIMIT_SUCCESS_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "size_one_under", + input="a" * (STRING_SIZE_LIMIT_BYTES - 4) + "XYZ", + regex="XYZ", + expected={"match": "XYZ", "idx": STRING_SIZE_LIMIT_BYTES - 4, "captures": []}, + msg="$regexFind should accept input one byte under the size limit", + ), + RegexFindTest( + "size_regex_at_pattern_limit", + input="a" * REGEX_PATTERN_LIMIT_BYTES, + regex="a" * REGEX_PATTERN_LIMIT_BYTES, + expected={"match": "a" * REGEX_PATTERN_LIMIT_BYTES, "idx": 0, "captures": []}, + msg="$regexFind should accept regex at the pattern length limit", + ), +] + +# Property [String Size Limit - Error]: input at the size limit or regex over the pattern limit +# produces an error. +REGEXFIND_SIZE_LIMIT_ERROR_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "size_at_limit", + input="a" * STRING_SIZE_LIMIT_BYTES, + regex="a", + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$regexFind should reject input at the size limit", + ), + RegexFindTest( + "size_regex_over_pattern_limit", + input="a", + regex="a" * (REGEX_PATTERN_LIMIT_BYTES + 1), + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFind should reject regex over the pattern length limit", + ), +] + +REGEXFIND_SIZE_LIMIT_ALL_TESTS = ( + REGEXFIND_SIZE_LIMIT_SUCCESS_TESTS + REGEXFIND_SIZE_LIMIT_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_SIZE_LIMIT_ALL_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind size limit cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py new file mode 100644 index 00000000..0895fa71 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py @@ -0,0 +1,390 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + REGEX_INPUT_TYPE_ERROR, + REGEX_OPTIONS_TYPE_ERROR, + REGEX_REGEX_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Strictness - input]: non-string, non-null input produces an error. +REGEXFIND_INPUT_TYPE_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "type_input_array", + input=["a"], + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject array as input", + ), + RegexFindTest( + "type_input_binary", + input=Binary(b"data"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject binary as input", + ), + RegexFindTest( + "type_input_bool", + input=True, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject bool as input", + ), + RegexFindTest( + "type_input_date", + input=datetime(2024, 1, 1, tzinfo=timezone.utc), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject date as input", + ), + RegexFindTest( + "type_input_decimal128", + input=DECIMAL128_ONE_AND_HALF, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject decimal128 as input", + ), + RegexFindTest( + "type_input_float", + input=3.14, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject float as input", + ), + RegexFindTest( + "type_input_int", + input=42, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject int as input", + ), + RegexFindTest( + "type_input_long", + input=Int64(42), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject long as input", + ), + RegexFindTest( + "type_input_maxkey", + input=MaxKey(), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject maxkey as input", + ), + RegexFindTest( + "type_input_minkey", + input=MinKey(), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject minkey as input", + ), + RegexFindTest( + "type_input_object", + input={"a": 1}, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject object as input", + ), + RegexFindTest( + "type_input_objectid", + input=ObjectId("507f1f77bcf86cd799439011"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject objectid as input", + ), + RegexFindTest( + "type_input_regex", + input=Regex("pattern"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject regex as input", + ), + RegexFindTest( + "type_input_timestamp", + input=Timestamp(1, 1), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject timestamp as input", + ), + RegexFindTest( + "type_input_code", + input=Code("function() {}"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject javascript code as input", + ), + RegexFindTest( + "type_input_code_scope", + input=Code("function() {}", {"x": 1}), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFind should reject code with scope as input", + ), +] + +# Property [Type Strictness - regex]: non-string, non-Regex, non-null regex produces an error. +REGEXFIND_REGEX_TYPE_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "type_regex_array", + input="hello", + regex=["a"], + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject array as regex", + ), + RegexFindTest( + "type_regex_binary", + input="hello", + regex=Binary(b"data"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject binary as regex", + ), + RegexFindTest( + "type_regex_bool", + input="hello", + regex=True, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject bool as regex", + ), + RegexFindTest( + "type_regex_date", + input="hello", + regex=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject date as regex", + ), + RegexFindTest( + "type_regex_decimal128", + input="hello", + regex=DECIMAL128_ONE_AND_HALF, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject decimal128 as regex", + ), + RegexFindTest( + "type_regex_float", + input="hello", + regex=3.14, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject float as regex", + ), + RegexFindTest( + "type_regex_int", + input="hello", + regex=42, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject int as regex", + ), + RegexFindTest( + "type_regex_long", + input="hello", + regex=Int64(42), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject long as regex", + ), + RegexFindTest( + "type_regex_maxkey", + input="hello", + regex=MaxKey(), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject maxkey as regex", + ), + RegexFindTest( + "type_regex_minkey", + input="hello", + regex=MinKey(), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject minkey as regex", + ), + RegexFindTest( + "type_regex_object", + input="hello", + regex={"a": 1}, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject object as regex", + ), + RegexFindTest( + "type_regex_objectid", + input="hello", + regex=ObjectId("507f1f77bcf86cd799439011"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject objectid as regex", + ), + RegexFindTest( + "type_regex_timestamp", + input="hello", + regex=Timestamp(1, 1), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject timestamp as regex", + ), + RegexFindTest( + "type_regex_code", + input="hello", + regex=Code("function() {}"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject javascript code as regex", + ), + RegexFindTest( + "type_regex_code_scope", + input="hello", + regex=Code("function() {}", {"x": 1}), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFind should reject code with scope as regex", + ), +] + +# Property [Type Strictness - options]: non-string options (excluding null) produces an error. +REGEXFIND_OPTIONS_TYPE_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "type_options_array", + input="hello", + regex="hello", + options=["a"], + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject array as options", + ), + RegexFindTest( + "type_options_binary", + input="hello", + regex="hello", + options=Binary(b"data"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject binary as options", + ), + RegexFindTest( + "type_options_bool", + input="hello", + regex="hello", + options=True, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject bool as options", + ), + RegexFindTest( + "type_options_date", + input="hello", + regex="hello", + options=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject date as options", + ), + RegexFindTest( + "type_options_decimal128", + input="hello", + regex="hello", + options=DECIMAL128_ONE_AND_HALF, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject decimal128 as options", + ), + RegexFindTest( + "type_options_float", + input="hello", + regex="hello", + options=3.14, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject float as options", + ), + RegexFindTest( + "type_options_int", + input="hello", + regex="hello", + options=42, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject int as options", + ), + RegexFindTest( + "type_options_long", + input="hello", + regex="hello", + options=Int64(42), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject long as options", + ), + RegexFindTest( + "type_options_maxkey", + input="hello", + regex="hello", + options=MaxKey(), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject maxkey as options", + ), + RegexFindTest( + "type_options_minkey", + input="hello", + regex="hello", + options=MinKey(), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject minkey as options", + ), + RegexFindTest( + "type_options_object", + input="hello", + regex="hello", + options={"a": 1}, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject object as options", + ), + RegexFindTest( + "type_options_objectid", + input="hello", + regex="hello", + options=ObjectId("507f1f77bcf86cd799439011"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject objectid as options", + ), + RegexFindTest( + "type_options_regex", + input="hello", + regex="hello", + options=Regex("pattern"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject regex as options", + ), + RegexFindTest( + "type_options_timestamp", + input="hello", + regex="hello", + options=Timestamp(1, 1), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject timestamp as options", + ), + RegexFindTest( + "type_options_code", + input="hello", + regex="hello", + options=Code("function() {}"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject javascript code as options", + ), + RegexFindTest( + "type_options_code_scope", + input="hello", + regex="hello", + options=Code("function() {}", {"x": 1}), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFind should reject code with scope as options", + ), +] + +REGEXFIND_TYPE_ERROR_ALL_TESTS = ( + REGEXFIND_INPUT_TYPE_TESTS + REGEXFIND_REGEX_TYPE_TESTS + REGEXFIND_OPTIONS_TYPE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_TYPE_ERROR_ALL_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind type error cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py new file mode 100644 index 00000000..d3284b1d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import pytest + +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.test_case import BaseTestCase, pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + RegexFindTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, + execute_project_with_insert, +) + + +# Property [Document Field References]: $regexFind works with field references +# from inserted documents, not just inline literals. +def test_regexfind_document_fields(collection): + """Test $regexFind reads values from document fields.""" + result = execute_project_with_insert( + collection, + {"s": "hello world"}, + {"result": {"$regexFind": {"input": "$s", "regex": "world"}}}, + ) + assertSuccess( + result, + [{"result": {"match": "world", "idx": 6, "captures": []}}], + msg="$regexFind should find match from document field references", + ) + + +# Property [Return Type]: match result has match (string), idx (int), and captures (array of +# strings). +@dataclass(frozen=True) +class RegexFindReturnTypeTest(BaseTestCase): + """Test case for $regexFind return type verification.""" + + input: Any = None + regex: Any = None + capture_element_types: list[str] | None = None + + +REGEXFIND_RETURN_TYPE_TESTS: list[RegexFindReturnTypeTest] = [ + RegexFindReturnTypeTest( + "return_type_no_captures", + input="hello", + regex="hello", + capture_element_types=[], + msg="$regexFind should return correct types with no captures", + ), + RegexFindReturnTypeTest( + "return_type_captures", + input="abc 123", + regex="([a-z]+) ([0-9]+)", + capture_element_types=["string", "string"], + msg="$regexFind should return string type for capture elements", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_RETURN_TYPE_TESTS)) +def test_regexfind_return_type(collection, test_case: RegexFindReturnTypeTest): + """Test $regexFind match result field types.""" + expr = {"$regexFind": {"input": test_case.input, "regex": test_case.regex}} + captures = {"$getField": {"field": "captures", "input": expr}} + result = execute_project( + collection, + { + "matchType": {"$type": {"$getField": {"field": "match", "input": expr}}}, + "idxType": {"$type": {"$getField": {"field": "idx", "input": expr}}}, + "capturesType": {"$type": captures}, + "captureElementTypes": {"$map": {"input": captures, "as": "c", "in": {"$type": "$$c"}}}, + }, + ) + assertSuccess( + result, + [ + { + "matchType": "string", + "idxType": "int", + "capturesType": "array", + "captureElementTypes": test_case.capture_element_types, + } + ], + msg=test_case.msg, + ) + + +# Property [Expression Arguments]: input, regex, and options accept expressions that resolve to +# the appropriate type. +REGEXFIND_EXPR_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "expr_input", + input={"$concat": ["hel", "lo"]}, + regex="hello", + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should accept expression for input", + ), + RegexFindTest( + "expr_regex", + input="hello", + regex={"$concat": ["hel", "lo"]}, + expected={"match": "hello", "idx": 0, "captures": []}, + msg="$regexFind should accept expression for regex", + ), + RegexFindTest( + "expr_options", + input="HELLO", + regex="hello", + options={"$concat": ["", "i"]}, + expected={"match": "HELLO", "idx": 0, "captures": []}, + msg="$regexFind should accept expression for options", + ), + RegexFindTest( + "expr_literal_dollar_regex", + input="price: $100", + regex={"$literal": "\\$[0-9]+"}, + expected={"match": "$100", "idx": 7, "captures": []}, + msg="$regexFind should accept $literal for dollar in regex", + ), +] + +# Property [Edge Cases - string literal]: a string starting with "$" in the input field is +# treated as a field path reference, not a literal string. When the referenced field is missing, +# the input resolves to null and null propagation applies. +REGEXFIND_LITERAL_INPUT_TESTS: list[RegexFindTest] = [ + RegexFindTest( + "edge_dollar_input_is_field_ref", + input="$nonexistent", + regex="\\$nonexistent", + expected=None, + msg="$regexFind should treat dollar-prefixed input as field ref", + ), +] + +REGEXFIND_USAGE_PARAM_TESTS = REGEXFIND_EXPR_TESTS + REGEXFIND_LITERAL_INPUT_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFIND_USAGE_PARAM_TESTS)) +def test_regexfind_cases(collection, test_case: RegexFindTest): + """Test $regexFind usage cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/utils/regexFind_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/utils/regexFind_common.py new file mode 100644 index 00000000..9cdde28e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/utils/regexFind_common.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from documentdb_tests.framework.test_case import BaseTestCase + +# Sentinel for "omit this parameter from the expression." Distinct from None (which means pass null) +# and MISSING (which means reference a missing field). +_OMIT = object() + + +@dataclass(frozen=True) +class RegexFindTest(BaseTestCase): + """Test case for $regexFind operator.""" + + input: Any = None + regex: Any = None + options: Any = _OMIT + expr: Any = None # Raw expression override for syntax tests + + +def _expr(test_case: RegexFindTest) -> dict[str, Any]: + if test_case.expr is not None: + return cast(dict[str, Any], test_case.expr) + params: dict[str, Any] = {"input": test_case.input, "regex": test_case.regex} + if test_case.options is not _OMIT: + params["options"] = test_case.options + return {"$regexFind": params} diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py new file mode 100644 index 00000000..6e22878d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [idx Code Point Semantics]: idx counts Unicode code points, not bytes. Each character +# contributes 1 regardless of UTF-8 byte width. +REGEXFINDALL_IDX_CODEPOINT_TESTS: list[RegexFindAllTest] = [ + # U+00E9 is 2 bytes. Byte index would be 2, codepoint index is 1. + RegexFindAllTest( + "idx_cp_2byte_prefix", + input="\u00e9abc", + regex="abc", + expected=[{"match": "abc", "idx": 1, "captures": []}], + msg="$regexFindAll idx should count 2-byte character as one code point", + ), + # U+65E5 is 3 bytes. Byte index would be 3, codepoint index is 1. + RegexFindAllTest( + "idx_cp_3byte_prefix", + input="日abc", + regex="abc", + expected=[{"match": "abc", "idx": 1, "captures": []}], + msg="$regexFindAll idx should count 3-byte CJK character as one code point", + ), + # U+1F389 is 4 bytes. Byte index would be 4, codepoint index is 1. + RegexFindAllTest( + "idx_cp_4byte_prefix", + input="🎉abc", + regex="abc", + expected=[{"match": "abc", "idx": 1, "captures": []}], + msg="$regexFindAll idx should count 4-byte emoji as one code point", + ), + # Mix of 2-byte, 3-byte, and 4-byte chars. Byte index would be 9, codepoint index is 3. + RegexFindAllTest( + "idx_cp_mixed_byte_widths", + input="\u00e9日🎉abc", + regex="abc", + expected=[{"match": "abc", "idx": 3, "captures": []}], + msg="$regexFindAll idx should count mixed 2/3/4-byte characters as one code point each", + ), + # Three 3-byte CJK chars. Byte index would be 9, codepoint index is 3. + RegexFindAllTest( + "idx_cp_cjk_prefix", + input="日本語abc", + regex="abc", + expected=[{"match": "abc", "idx": 3, "captures": []}], + msg="$regexFindAll idx should count three CJK characters as three code points", + ), + # Combining mark (e + U+0301) is two codepoints, not one. idx would be 1 if normalized to + # precomposed U+00E9. + RegexFindAllTest( + "idx_cp_combining_mark", + input="e\u0301abc", + regex="abc", + expected=[{"match": "abc", "idx": 2, "captures": []}], + msg="$regexFindAll idx should count combining mark as a separate code point", + ), +] + + +# Property [Encoding]: multi-byte UTF-8 characters in the match itself are preserved correctly. +REGEXFINDALL_ENCODING_TESTS: list[RegexFindAllTest] = [ + # 2-byte character in match (U+00E9). + RegexFindAllTest( + "encoding_2byte_in_match", + input="caf\u00e9", + regex="\u00e9", + expected=[{"match": "\u00e9", "idx": 3, "captures": []}], + msg="$regexFindAll should preserve 2-byte character in match output", + ), + # 3-byte character in match (U+65E5). + RegexFindAllTest( + "encoding_3byte_in_match", + input="hello日world", + regex="日", + expected=[{"match": "日", "idx": 5, "captures": []}], + msg="$regexFindAll should preserve 3-byte CJK character in match output", + ), + # 4-byte emoji in match (U+1F389). + RegexFindAllTest( + "encoding_4byte_in_match", + input="hello🎉world", + regex="🎉", + expected=[{"match": "🎉", "idx": 5, "captures": []}], + msg="$regexFindAll should preserve 4-byte emoji in match output", + ), + # Match spanning multiple multi-byte characters. + RegexFindAllTest( + "encoding_multibyte_span", + input="日本語", + regex="本語", + expected=[{"match": "本語", "idx": 1, "captures": []}], + msg="$regexFindAll should match span of multiple multi-byte characters", + ), + # Combining mark (U+0301) in the match value is preserved. + RegexFindAllTest( + "encoding_combining_mark_in_match", + input="e\u0301", + regex="e\u0301", + expected=[{"match": "e\u0301", "idx": 0, "captures": []}], + msg="$regexFindAll should preserve combining mark in match output", + ), + # Precomposed U+00E9 and decomposed e+U+0301 are not normalized to each other. + RegexFindAllTest( + "encoding_precomposed_not_decomposed", + input="\u00e9", + regex="e\u0301", + expected=[], + msg="$regexFindAll should not match precomposed character with decomposed regex", + ), + # \s does NOT match NBSP (U+00A0). + RegexFindAllTest( + "encoding_s_no_nbsp", + input="\u00a0hello", + regex="\\s", + expected=[], + msg="$regexFindAll \\s should not match non-breaking space U+00A0", + ), + # Multi-byte characters between matches. Each contributes 1 to idx + # regardless of byte width. + RegexFindAllTest( + "encoding_multibyte_between_matches", + input="x日x🎉x", + regex="x", + expected=[ + {"match": "x", "idx": 0, "captures": []}, + {"match": "x", "idx": 2, "captures": []}, + {"match": "x", "idx": 4, "captures": []}, + ], + msg="$regexFindAll should count multi-byte chars between matches as one code point each", + ), +] + +REGEXFINDALL_ENCODING_ALL_TESTS = REGEXFINDALL_IDX_CODEPOINT_TESTS + REGEXFINDALL_ENCODING_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_ENCODING_ALL_TESTS)) +def test_regexfindall_encoding(collection, test_case: RegexFindAllTest): + """Test $regexFindAll Unicode encoding and idx code point semantics.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py new file mode 100644 index 00000000..f09af115 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py @@ -0,0 +1,249 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, + REGEX_BAD_OPTION_ERROR, + REGEX_BAD_PATTERN_ERROR, + REGEX_MISSING_INPUT_ERROR, + REGEX_MISSING_REGEX_ERROR, + REGEX_NON_OBJECT_ERROR, + REGEX_NULL_BYTE_ERROR, + REGEX_OPTIONS_NULL_BYTE_ERROR, + REGEX_UNKNOWN_FIELD_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Syntax Validation]: missing required fields or unknown fields produce errors. +REGEXFINDALL_SYNTAX_ERROR_TESTS: list[RegexFindAllTest] = [ + # Non-object argument. + RegexFindAllTest( + "syntax_non_object_string", + expr={"$regexFindAll": "string"}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFindAll should reject string as argument", + ), + RegexFindAllTest( + "syntax_non_object_array", + expr={"$regexFindAll": ["array"]}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFindAll should reject array as argument", + ), + RegexFindAllTest( + "syntax_non_object_null", + expr={"$regexFindAll": None}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFindAll should reject null as argument", + ), + RegexFindAllTest( + "syntax_non_object_int", + expr={"$regexFindAll": 42}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFindAll should reject int as argument", + ), + RegexFindAllTest( + "syntax_non_object_bool", + expr={"$regexFindAll": True}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexFindAll should reject bool as argument", + ), + # Empty object produces missing input error. + RegexFindAllTest( + "syntax_empty_object", + expr={"$regexFindAll": {}}, + error_code=REGEX_MISSING_INPUT_ERROR, + msg="$regexFindAll should reject empty object with missing input error", + ), + RegexFindAllTest( + "syntax_missing_input", + expr={"$regexFindAll": {"regex": "abc"}}, + error_code=REGEX_MISSING_INPUT_ERROR, + msg="$regexFindAll should reject object missing input field", + ), + RegexFindAllTest( + "syntax_missing_regex", + expr={"$regexFindAll": {"input": "abc"}}, + error_code=REGEX_MISSING_REGEX_ERROR, + msg="$regexFindAll should reject object missing regex field", + ), + RegexFindAllTest( + "syntax_unknown_field", + expr={"$regexFindAll": {"input": "abc", "regex": "abc", "bogus": 1}}, + error_code=REGEX_UNKNOWN_FIELD_ERROR, + msg="$regexFindAll should reject unknown field in argument object", + ), +] + + +# Property [Type Strictness - pattern]: invalid regex pattern produces an error. +REGEXFINDALL_BAD_PATTERN_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "type_bad_pattern_bracket", + input="abc", + regex="[invalid", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFindAll should reject unclosed bracket in regex pattern", + ), + RegexFindAllTest( + "type_bad_pattern_paren", + input="abc", + regex="(unclosed", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFindAll should reject unclosed parenthesis in regex pattern", + ), + RegexFindAllTest( + "type_bad_pattern_var_lookbehind", + input="abc", + regex="(?<=a+)b", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFindAll should reject variable-length lookbehind in regex pattern", + ), +] + +# Property [Regex Pattern - null byte]: embedded null byte in regex pattern produces an error. +REGEXFINDALL_NULL_BYTE_PATTERN_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "type_null_byte_in_pattern", + input="abc", + regex="ab\x00c", + error_code=REGEX_NULL_BYTE_ERROR, + msg="$regexFindAll should reject embedded null byte in regex pattern", + ), +] + +# Property [Type Strictness - option char]: unrecognized option character produces an error. +# Leading/trailing whitespace and mixed valid/invalid flags also produce errors. +REGEXFINDALL_BAD_OPTION_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "type_bad_option", + input="abc", + regex="abc", + options="z", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFindAll should reject unrecognized option character 'z'", + ), + # Leading whitespace treated as invalid flag. + RegexFindAllTest( + "type_bad_option_leading_whitespace", + input="abc", + regex="abc", + options=" i", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFindAll should reject leading whitespace in options", + ), + # Trailing whitespace treated as invalid flag. + RegexFindAllTest( + "type_bad_option_trailing_whitespace", + input="abc", + regex="abc", + options="i ", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFindAll should reject trailing whitespace in options", + ), + # Mix of valid and invalid flags. + RegexFindAllTest( + "type_bad_option_mixed_valid_invalid", + input="abc", + regex="abc", + options="ig", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFindAll should reject options containing mix of valid and invalid flags", + ), + # Uppercase valid flags are rejected. + RegexFindAllTest( + "type_bad_option_uppercase_I", + input="abc", + regex="abc", + options="I", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFindAll should reject uppercase 'I' as option flag", + ), +] + +# Property [Null Byte in Options]: null byte in options string produces a distinct error. +REGEXFINDALL_OPTIONS_NULL_BYTE_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "type_null_byte_in_options", + input="abc", + regex="abc", + options="i\x00m", + error_code=REGEX_OPTIONS_NULL_BYTE_ERROR, + msg="$regexFindAll should reject embedded null byte in options string", + ), +] + + +# Property [Dollar Sign Error]: a bare "$" is interpreted as a field path and "$$" is interpreted +# as an empty variable name. +REGEXFINDALL_DOLLAR_ERROR_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "dollar_bare_input", + input="$", + regex="abc", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexFindAll should reject bare '$' as input field path", + ), + RegexFindAllTest( + "dollar_bare_regex", + input="hello", + regex="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexFindAll should reject bare '$' as regex field path", + ), + RegexFindAllTest( + "dollar_bare_options", + input="hello", + regex="abc", + options="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexFindAll should reject bare '$' as options field path", + ), + RegexFindAllTest( + "dollar_double_input", + input="$$", + regex="abc", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexFindAll should reject '$$' as empty variable name in input", + ), + RegexFindAllTest( + "dollar_double_regex", + input="hello", + regex="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexFindAll should reject '$$' as empty variable name in regex", + ), + RegexFindAllTest( + "dollar_double_options", + input="hello", + regex="abc", + options="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexFindAll should reject '$$' as empty variable name in options", + ), +] + +REGEXFINDALL_INVALID_ARGS_ALL_TESTS = ( + REGEXFINDALL_SYNTAX_ERROR_TESTS + + REGEXFINDALL_BAD_PATTERN_TESTS + + REGEXFINDALL_NULL_BYTE_PATTERN_TESTS + + REGEXFINDALL_BAD_OPTION_TESTS + + REGEXFINDALL_OPTIONS_NULL_BYTE_TESTS + + REGEXFINDALL_DOLLAR_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_INVALID_ARGS_ALL_TESTS)) +def test_regexfindall_invalid_args(collection, test_case: RegexFindAllTest): + """Test $regexFindAll syntax validation and invalid argument errors.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py new file mode 100644 index 00000000..d757abc8 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import pytest + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.test_case import BaseTestCase, pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) + +# Property [Return Type - array]: result is always an array, even for no-match and null-propagation +# cases. +REGEXFINDALL_RETURN_TYPE_ARRAY_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "return_type_array_match", + input="hello", + regex="hello", + msg="$regexFindAll should return array type when there is a match", + ), + RegexFindAllTest( + "return_type_array_no_match", + input="hello", + regex="xyz", + msg="$regexFindAll should return array type when there is no match", + ), + RegexFindAllTest( + "return_type_array_null_input", + input=None, + regex="abc", + msg="$regexFindAll should return array type when input is null", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_RETURN_TYPE_ARRAY_TESTS)) +def test_regexfindall_return_type_array(collection, test_case: RegexFindAllTest): + """Test $regexFindAll result is always an array.""" + result = execute_expression(collection, {"$type": _expr(test_case)}) + assertSuccess(result, [{"result": "array"}], msg=test_case.msg) + + +# Property [Return Type - elements]: each element has match (string), idx (int), and captures (array +# of strings). +@dataclass(frozen=True) +class RegexFindAllReturnTypeElementTest(BaseTestCase): + """Test case for $regexFindAll return type element verification.""" + + input: Any = None + regex: Any = None + capture_element_types: list[str] | None = None + + +REGEXFINDALL_RETURN_TYPE_ELEMENT_TESTS: list[RegexFindAllReturnTypeElementTest] = [ + RegexFindAllReturnTypeElementTest( + "return_type_elem_no_captures", + input="hello", + regex="hello", + capture_element_types=[], + msg="$regexFindAll match element should have string match, int idx, and empty captures", + ), + RegexFindAllReturnTypeElementTest( + "return_type_elem_captures", + input="abc 123", + regex="([a-z]+) ([0-9]+)", + capture_element_types=["string", "string"], + msg="$regexFindAll match element captures should contain string-typed elements", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_RETURN_TYPE_ELEMENT_TESTS)) +def test_regexfindall_return_type_elements( + collection, test_case: RegexFindAllReturnTypeElementTest +): + """Test $regexFindAll match element field types.""" + expr = {"$regexFindAll": {"input": test_case.input, "regex": test_case.regex}} + first = {"$arrayElemAt": [expr, 0]} + captures = {"$getField": {"field": "captures", "input": first}} + result = execute_project( + collection, + { + "matchType": {"$type": {"$getField": {"field": "match", "input": first}}}, + "idxType": {"$type": {"$getField": {"field": "idx", "input": first}}}, + "capturesType": {"$type": captures}, + "captureElementTypes": {"$map": {"input": captures, "as": "c", "in": {"$type": "$$c"}}}, + }, + ) + assertSuccess( + result, + [ + { + "matchType": "string", + "idxType": "int", + "capturesType": "array", + "captureElementTypes": test_case.capture_element_types, + } + ], + msg=test_case.msg, + ) + + +# Property [Multiple Match Enumeration - idx ordering]: all idx values in the result are strictly +# increasing. +REGEXFINDALL_IDX_ORDERING_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "ordering_three_matches", + input="abcabcabc", + regex="abc", + msg="$regexFindAll idx values should be strictly increasing for three adjacent matches", + ), + RegexFindAllTest( + "ordering_scattered", + input="aXbXc", + regex="[abc]", + msg="$regexFindAll idx should be strictly increasing for scattered single-char matches", + ), + RegexFindAllTest( + "ordering_alternation", + input="catXdogXfoxXdogXcat", + regex="cat|dog|fox", + msg="$regexFindAll idx values should be strictly increasing for alternation matches", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_IDX_ORDERING_TESTS)) +def test_regexfindall_idx_ordering(collection, test_case: RegexFindAllTest): + """Test $regexFindAll idx values are strictly increasing.""" + result = execute_expression(collection, _expr(test_case)) + + def _check(docs): + matches = docs[0]["result"] + idxs = [m["idx"] for m in matches] + is_increasing = all(idxs[i] < idxs[i + 1] for i in range(len(idxs) - 1)) + return {"hasMultiple": len(idxs) >= 2, "isStrictlyIncreasing": is_increasing} + + assertSuccess( + result, + {"hasMultiple": True, "isStrictlyIncreasing": True}, + transform=_check, + msg=test_case.msg, + ) + + +# Property [Multiple Match Enumeration - no overlap]: for non-zero-width matches, each subsequent +# match starts at or after the end of the previous match. +REGEXFINDALL_NO_OVERLAP_TESTS: list[RegexFindAllTest] = [ + # "aa" on "aaaa": non-overlapping gives 2 matches at 0 and 2, not 3 at 0, 1, 2. + RegexFindAllTest( + "no_overlap_could_overlap", + input="aaaa", + regex="aa", + msg="$regexFindAll should not produce overlapping matches for 'aa' on 'aaaa'", + ), + # "aba" on "abababa": non-overlapping gives 2 matches at 0 and 4, not 3 at 0, 2, 4. + RegexFindAllTest( + "no_overlap_interleaved", + input="abababa", + regex="aba", + msg="$regexFindAll should not produce overlapping matches for 'aba' on 'abababa'", + ), + RegexFindAllTest( + "no_overlap_exact_adjacent", + input="abcabcabc", + regex="abc", + msg="$regexFindAll should produce non-overlapping adjacent matches for 'abc' repeated", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_NO_OVERLAP_TESTS)) +def test_regexfindall_no_overlap(collection, test_case: RegexFindAllTest): + """Test $regexFindAll non-zero-width matches do not overlap.""" + result = execute_expression(collection, _expr(test_case)) + + def _check(docs): + matches = docs[0]["result"] + no_overlap = all( + b["idx"] >= a["idx"] + len(a["match"]) for a, b in zip(matches, matches[1:]) + ) + return {"hasMultiple": len(matches) >= 2, "noOverlap": no_overlap} + + assertSuccess( + result, + {"hasMultiple": True, "noOverlap": True}, + transform=_check, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py new file mode 100644 index 00000000..542387e1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py @@ -0,0 +1,301 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Captures Behavior]: captures array length equals the number of capture groups, in +# pattern order. Unmatched branches produce null. Non-capturing groups are excluded. Nested groups +# are each represented. Each result document has its own independent captures array. +REGEXFINDALL_CAPTURES_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "captures_single_group", + input="abc123", + regex="([0-9]+)", + expected=[{"match": "123", "idx": 3, "captures": ["123"]}], + msg="$regexFindAll should populate captures with single group match", + ), + # Order matches left-to-right group appearance. + RegexFindAllTest( + "captures_two_groups_order", + input="John Smith", + regex="(\\w+) (\\w+)", + expected=[{"match": "John Smith", "idx": 0, "captures": ["John", "Smith"]}], + msg="$regexFindAll should order captures left-to-right by group position", + ), + # Second alternation branch unmatched produces null in that position. + RegexFindAllTest( + "captures_unmatched_branch", + input="cat", + regex="(cat)|(dog)", + expected=[{"match": "cat", "idx": 0, "captures": ["cat", None]}], + msg="$regexFindAll should produce null for unmatched alternation branch capture", + ), + # Non-capturing group excluded from captures. + RegexFindAllTest( + "captures_non_capturing_excluded", + input="abc123", + regex="(?:abc)([0-9]+)", + expected=[{"match": "abc123", "idx": 0, "captures": ["123"]}], + msg="$regexFindAll should exclude non-capturing group from captures", + ), + # Nested groups: outer then inner, left to right. + RegexFindAllTest( + "captures_nested_groups", + input="abc", + regex="((a)(b))c", + expected=[{"match": "abc", "idx": 0, "captures": ["ab", "a", "b"]}], + msg="$regexFindAll should list nested captures outer-then-inner left-to-right", + ), + # Named group included in captures. + RegexFindAllTest( + "captures_named_groups", + input="abc123", + regex="(?P[a-z]+)(?P[0-9]+)", + expected=[{"match": "abc123", "idx": 0, "captures": ["abc", "123"]}], + msg="$regexFindAll should include named groups in captures array", + ), + # Empty capture group captures empty string. + RegexFindAllTest( + "captures_empty_group", + input="abc", + regex="()(abc)", + expected=[{"match": "abc", "idx": 0, "captures": ["", "abc"]}], + msg="$regexFindAll should capture empty string for empty capture group", + ), + # Captures vary across multiple matches. Same group matches different content in each result + # document. + RegexFindAllTest( + "captures_multi_match_varying", + input="cat dog", + regex="(\\w+)", + expected=[ + {"match": "cat", "idx": 0, "captures": ["cat"]}, + {"match": "dog", "idx": 4, "captures": ["dog"]}, + ], + msg="$regexFindAll should produce independent captures for each match", + ), + # A capture group participates in one match but not another. The alternation (cat)|(dog) has two + # groups. Each match populates one and leaves the other null. + RegexFindAllTest( + "captures_group_participates_in_some", + input="cat dog", + regex="(cat)|(dog)", + expected=[ + {"match": "cat", "idx": 0, "captures": ["cat", None]}, + {"match": "dog", "idx": 4, "captures": [None, "dog"]}, + ], + msg="$regexFindAll should set null for non-participating group in each match", + ), +] + + +# Property [Edge Cases]: empty strings, large inputs, and control characters are handled correctly. +REGEXFINDALL_EDGE_TESTS: list[RegexFindAllTest] = [ + # Empty input with empty regex matches once at position 0. + RegexFindAllTest( + "edge_empty_input_empty_regex", + input="", + regex="", + expected=[{"match": "", "idx": 0, "captures": []}], + msg="$regexFindAll should return one empty match when both input and regex are empty", + ), + # Empty regex on non-empty input matches at every code point position. + RegexFindAllTest( + "edge_nonempty_input_empty_regex", + input="hello", + regex="", + expected=[ + {"match": "", "idx": 0, "captures": []}, + {"match": "", "idx": 1, "captures": []}, + {"match": "", "idx": 2, "captures": []}, + {"match": "", "idx": 3, "captures": []}, + {"match": "", "idx": 4, "captures": []}, + ], + msg="$regexFindAll should match empty regex at every code point position", + ), + # Empty input with non-empty regex returns empty array. + RegexFindAllTest( + "edge_empty_input_nonempty_regex", + input="", + regex="abc", + expected=[], + msg="$regexFindAll should return empty array when input is empty and regex is non-empty", + ), + # Large input with many matches. 5_000 is an arbitrary high count that remains + # performant; scaling to STRING_SIZE_LIMIT_BYTES would produce ~8M matches and hang. + RegexFindAllTest( + "edge_large_input_many_matches", + input="ab" * 5_000, + regex="ab", + expected=[{"match": "ab", "idx": i * 2, "captures": []} for i in range(5_000)], + msg="$regexFindAll should return all 5000 matches from a large repeated input", + ), + # Newline in input. + RegexFindAllTest( + "edge_newline", + input="hello\nworld", + regex="world", + expected=[{"match": "world", "idx": 6, "captures": []}], + msg="$regexFindAll should match across newline in input", + ), + # Tab in input. + RegexFindAllTest( + "edge_tab", + input="hello\tworld", + regex="world", + expected=[{"match": "world", "idx": 6, "captures": []}], + msg="$regexFindAll should match across tab in input", + ), + # Null byte in input. + RegexFindAllTest( + "edge_null_byte", + input="hello\x00world", + regex="world", + expected=[{"match": "world", "idx": 6, "captures": []}], + msg="$regexFindAll should match after embedded null byte in input", + ), + # Carriage return in input. + RegexFindAllTest( + "edge_carriage_return", + input="hello\rworld", + regex="world", + expected=[{"match": "world", "idx": 6, "captures": []}], + msg="$regexFindAll should match across carriage return in input", + ), + # Multiple matches spanning newline boundaries. + RegexFindAllTest( + "edge_matches_across_newlines", + input="abc\nabc\nabc", + regex="abc", + expected=[ + {"match": "abc", "idx": 0, "captures": []}, + {"match": "abc", "idx": 4, "captures": []}, + {"match": "abc", "idx": 8, "captures": []}, + ], + msg="$regexFindAll should find all matches spanning newline boundaries", + ), +] + +# Property [Multiple Match Enumeration]: all non-overlapping matches are returned in position order. +REGEXFINDALL_MULTI_MATCH_TESTS: list[RegexFindAllTest] = [ + # Greedy quantifier consumes maximum input, reducing match count. + RegexFindAllTest( + "multi_greedy_fewer_matches", + input="aaa", + regex="a+", + expected=[{"match": "aaa", "idx": 0, "captures": []}], + msg="$regexFindAll greedy quantifier should consume maximum input in one match", + ), + # Lazy quantifier consumes minimum input, increasing match count. + RegexFindAllTest( + "multi_nongreedy_more_matches", + input="aaa", + regex="a+?", + expected=[ + {"match": "a", "idx": 0, "captures": []}, + {"match": "a", "idx": 1, "captures": []}, + {"match": "a", "idx": 2, "captures": []}, + ], + msg="$regexFindAll lazy quantifier should produce one match per character", + ), + # Pattern matching every character produces one match per code point. + RegexFindAllTest( + "multi_per_character", + input="abc", + regex=".", + expected=[ + {"match": "a", "idx": 0, "captures": []}, + {"match": "b", "idx": 1, "captures": []}, + {"match": "c", "idx": 2, "captures": []}, + ], + msg="$regexFindAll dot pattern should produce one match per code point", + ), + # Matches from different branches returned in position order. + RegexFindAllTest( + "multi_alternation_position_order", + input="catXdogXcat", + regex="cat|dog", + expected=[ + {"match": "cat", "idx": 0, "captures": []}, + {"match": "dog", "idx": 4, "captures": []}, + {"match": "cat", "idx": 8, "captures": []}, + ], + msg="$regexFindAll should return alternation matches in position order", + ), +] + +# Property [Zero-Width Match Behavior]: zero-width matches are enumerated across the input, with +# the engine advancing by one code point after each to avoid infinite repetition. +REGEXFINDALL_ZERO_WIDTH_TESTS: list[RegexFindAllTest] = [ + # Lookahead matches at each position where the condition holds. + RegexFindAllTest( + "zero_width_lookahead", + input="aba", + regex="(?=a)", + expected=[ + {"match": "", "idx": 0, "captures": []}, + {"match": "", "idx": 2, "captures": []}, + ], + msg="$regexFindAll should produce zero-width matches at each lookahead position", + ), + # Engine advances by one code point after each zero-width match. + RegexFindAllTest( + "zero_width_advance_one_cp", + input="abc", + regex="(?=.)", + expected=[ + {"match": "", "idx": 0, "captures": []}, + {"match": "", "idx": 1, "captures": []}, + {"match": "", "idx": 2, "captures": []}, + ], + msg="$regexFindAll should advance one code point after each zero-width match", + ), + # Capturing lookahead: match is empty, captures populated from the lookahead content. + RegexFindAllTest( + "zero_width_capturing_lookahead", + input="hello", + regex="(?=(ll))", + expected=[{"match": "", "idx": 2, "captures": ["ll"]}], + msg="$regexFindAll capturing lookahead should have empty match but populated captures", + ), +] + +# Property [Empty Regex Matches]: an empty capturing group on non-empty input produces N matches +# (consistent with empty string regex behavior, not N+1). +REGEXFINDALL_EMPTY_REGEX_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "empty_regex_capturing_group", + input="abc", + regex="()", + expected=[ + {"match": "", "idx": 0, "captures": [""]}, + {"match": "", "idx": 1, "captures": [""]}, + {"match": "", "idx": 2, "captures": [""]}, + ], + msg="$regexFindAll empty capturing group should match at each code point position", + ), +] + +REGEXFINDALL_MATCHING_ALL_TESTS = ( + REGEXFINDALL_CAPTURES_TESTS + + REGEXFINDALL_EDGE_TESTS + + REGEXFINDALL_MULTI_MATCH_TESTS + + REGEXFINDALL_ZERO_WIDTH_TESTS + + REGEXFINDALL_EMPTY_REGEX_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_MATCHING_ALL_TESTS)) +def test_regexfindall_matching(collection, test_case: RegexFindAllTest): + """Test $regexFindAll matching behavior.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py new file mode 100644 index 00000000..a6527a94 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Null Propagation]: null in input or regex causes the result to be an empty array. +_PLACEHOLDER = object() +_NULL_PATTERNS = [ + (_PLACEHOLDER, "abc", "input"), + ("hello", _PLACEHOLDER, "regex"), + (_PLACEHOLDER, _PLACEHOLDER, "both"), +] + + +def _build_null_tests(null_value, prefix) -> list[RegexFindAllTest]: + _MSG_MAP = { + "input": f"$regexFindAll should return empty array when input is {prefix}", + "regex": f"$regexFindAll should return empty array when regex is {prefix}", + "both": f"$regexFindAll should return empty array when both input and regex are {prefix}", + } + return [ + RegexFindAllTest( + f"{prefix}_{suffix}", + input=null_value if _input is _PLACEHOLDER else _input, + regex=null_value if regex is _PLACEHOLDER else regex, + expected=[], + msg=_MSG_MAP[suffix], + ) + for _input, regex, suffix in _NULL_PATTERNS + ] + + +REGEXFINDALL_NULL_TESTS = _build_null_tests(None, "null") + +# Property [Null Propagation - missing]: missing fields in input or regex are treated as null. +REGEXFINDALL_MISSING_TESTS = _build_null_tests(MISSING, "missing") + +# Property [Null Propagation - mixed]: combining null and missing across input and regex still +# produces an empty array. +REGEXFINDALL_MIXED_NULL_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "mixed_null_input_missing_regex", + input=None, + regex=MISSING, + expected=[], + msg="$regexFindAll should return empty array when input is null and regex is missing", + ), + RegexFindAllTest( + "mixed_missing_input_null_regex", + input=MISSING, + regex=None, + expected=[], + msg="$regexFindAll should return empty array when input is missing and regex is null", + ), +] + +# Property [Options Null]: null in options does not cause null propagation. +REGEXFINDALL_OPTIONS_NULL_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "options_null", + input="hello", + regex="hello", + options=None, + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should match normally when options is null", + ), +] + +# Property [Options Null - missing]: missing field in options does not cause null propagation. +REGEXFINDALL_OPTIONS_MISSING_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "options_missing", + input="hello", + regex="hello", + options=MISSING, + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should match normally when options is a missing field", + ), +] + + +# Property [Null Propagation - expressions]: an expression that evaluates to null is treated +# identically to a null literal. +REGEXFINDALL_EXPR_NULL_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "expr_null_input", + input={"$cond": [False, "hello", None]}, + regex="hello", + expected=[], + msg="$regexFindAll should return empty array when input expression evaluates to null", + ), + RegexFindAllTest( + "expr_null_regex", + input="hello", + regex={"$cond": [False, "hello", None]}, + expected=[], + msg="$regexFindAll should return empty array when regex expression evaluates to null", + ), +] + +REGEXFINDALL_NULL_ALL_TESTS = ( + REGEXFINDALL_NULL_TESTS + + REGEXFINDALL_MISSING_TESTS + + REGEXFINDALL_MIXED_NULL_TESTS + + REGEXFINDALL_OPTIONS_NULL_TESTS + + REGEXFINDALL_OPTIONS_MISSING_TESTS + + REGEXFINDALL_EXPR_NULL_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_NULL_ALL_TESTS)) +def test_regexfindall_null(collection, test_case: RegexFindAllTest): + """Test $regexFindAll null propagation cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py new file mode 100644 index 00000000..039163c9 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Regex Options]: options field accepts i, m, s, x as a string or BSON Regex flags. Empty +# string, duplicates, and "u" (PCRE UTF-8 flag) are valid. Invalid BSON Regex flags are silently +# accepted. +REGEXFINDALL_OPTIONS_TESTS: list[RegexFindAllTest] = [ + # Empty string options is valid and means no options. + RegexFindAllTest( + "options_empty_string", + input="hello", + regex="hello", + options="", + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should accept empty string as valid options", + ), + # i: case-insensitive. + RegexFindAllTest( + "options_i_case_insensitive", + input="HELLO", + regex="hello", + options="i", + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should match case-insensitively with 'i' option", + ), + # m: multiline, ^ matches start of each line. + RegexFindAllTest( + "options_m_multiline", + input="line1\nline2", + regex="^line2", + options="m", + expected=[{"match": "line2", "idx": 6, "captures": []}], + msg="$regexFindAll should match ^ at start of each line with 'm' option", + ), + # m: \r\n recognized as line ending. + RegexFindAllTest( + "options_m_crlf", + input="line1\r\nline2", + regex="^line2", + options="m", + expected=[{"match": "line2", "idx": 7, "captures": []}], + msg="$regexFindAll should recognize \\r\\n as line ending with 'm' option", + ), + # s: dotAll, . matches newline. + RegexFindAllTest( + "options_s_dotall", + input="line1\nline2", + regex="line1.line2", + options="s", + expected=[{"match": "line1\nline2", "idx": 0, "captures": []}], + msg="$regexFindAll should match dot against newline with 's' option", + ), + # x: extended, whitespace in pattern is ignored. + RegexFindAllTest( + "options_x_extended", + input="hello", + regex="hel lo", + options="x", + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should ignore whitespace in pattern with 'x' option", + ), + # u: PCRE UTF-8 flag. Unlike truly invalid flags (e.g. "g", "z") which produce + # REGEX_BAD_OPTION_ERROR, "u" is silently accepted because the underlying PCRE engine + # recognizes it. It has no observable effect since the server already operates in UTF-8 mode. + RegexFindAllTest( + "options_u_silently_accepted", + input="hello", + regex="hello", + options="u", + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should silently accept 'u' option without error", + ), + # Combined options. + RegexFindAllTest( + "options_combined_im", + input="HELLO\nWORLD", + regex="^world", + options="im", + expected=[{"match": "WORLD", "idx": 6, "captures": []}], + msg="$regexFindAll should apply combined 'im' options together", + ), + # Duplicate option characters accepted. + RegexFindAllTest( + "options_duplicate_ii", + input="HELLO", + regex="hello", + options="ii", + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should accept duplicate option characters", + ), + # BSON Regex flag i (case-insensitive). + RegexFindAllTest( + "options_bson_regex_flag_i", + input="HELLO", + regex=Regex("hello", "i"), + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should apply case-insensitive flag from BSON Regex", + ), + # BSON Regex flag s (dotAll). + RegexFindAllTest( + "options_bson_regex_flag_s", + input="a\nb", + regex=Regex("a.b", "s"), + expected=[{"match": "a\nb", "idx": 0, "captures": []}], + msg="$regexFindAll should apply dotAll flag from BSON Regex", + ), + # BSON Regex flag x (extended). + RegexFindAllTest( + "options_bson_regex_flag_x", + input="hello", + regex=Regex("hel lo", "x"), + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should apply extended flag from BSON Regex", + ), + # Invalid flags in a BSON Regex object are silently accepted (the server does not + # validate BSON Regex flags the same way it validates the options string). + RegexFindAllTest( + "options_bson_regex_invalid_flag_silent", + input="hello", + regex=Regex("hello", "z"), + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should silently accept invalid flags in BSON Regex", + ), + # All four options combined as string. + RegexFindAllTest( + "options_all_four_combined", + input="HELLO", + regex="hel lo", + options="imsx", + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should apply all four options combined as 'imsx'", + ), + # Null options does not count as "specifying options", so no conflict with BSON Regex flags. + RegexFindAllTest( + "options_no_conflict_null_options_with_flags", + input="HELLO", + regex=Regex("hello", "i"), + options=None, + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should not conflict when options is null and BSON Regex has flags", + ), + # BSON Regex with no flags, options field provides the option. + RegexFindAllTest( + "options_no_conflict_no_flags_with_options", + input="HELLO", + regex=Regex("hello"), + options="i", + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should accept options field when BSON Regex has no flags", + ), + # BSON Regex with empty flags combined with non-empty options is accepted. + RegexFindAllTest( + "options_no_conflict_empty_flags_with_options", + input="HELLO", + regex=Regex("hello", ""), + options="i", + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should accept options field when BSON Regex has empty flags", + ), +] + + +# Property [Options Conflict - no conflict]: BSON Regex with only unrecognized flags does not +# conflict with the options field. +REGEXFINDALL_OPTIONS_NO_CONFLICT_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "options_no_conflict_unrecognized_flag", + input="hello", + regex=Regex("hello", "z"), + options="i", + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should not conflict when BSON Regex has only unrecognized flags", + ), +] + +REGEXFINDALL_OPTIONS_ALL_TESTS = REGEXFINDALL_OPTIONS_TESTS + REGEXFINDALL_OPTIONS_NO_CONFLICT_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_OPTIONS_ALL_TESTS)) +def test_regexfindall_options(collection, test_case: RegexFindAllTest): + """Test $regexFindAll regex options behavior.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py new file mode 100644 index 00000000..6608c330 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + REGEX_BAD_OPTION_ERROR, + REGEX_BAD_PATTERN_ERROR, + REGEX_INPUT_TYPE_ERROR, + REGEX_OPTIONS_CONFLICT_ERROR, + REGEX_OPTIONS_TYPE_ERROR, + REGEX_REGEX_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Null Precedence]: null propagation from regex takes precedence over bad option flag +# validation. +REGEXFINDALL_PRECEDENCE_SUCCESS_TESTS: list[RegexFindAllTest] = [ + # Null regex takes precedence over bad option flag validation. + RegexFindAllTest( + "precedence_null_regex_over_bad_option", + input="abc", + regex=None, + options="z", + expected=[], + msg="$regexFindAll null regex should take precedence over invalid option flag", + ), +] + + +# Property [Options Placement]: when regex is a BSON Regex with flags, specifying the options field +# produces an error, even if the flags are equivalent or options is empty. +REGEXFINDALL_OPTIONS_CONFLICT_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "options_conflict_same_flags", + input="hello", + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFindAll should reject options field when BSON Regex has same flags", + ), + RegexFindAllTest( + "options_conflict_different_flags", + input="hello", + regex=Regex("hello", "i"), + options="m", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFindAll should reject options field when BSON Regex has different flags", + ), + RegexFindAllTest( + "options_conflict_empty_options", + input="hello", + regex=Regex("hello", "i"), + options="", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFindAll should reject empty options field when BSON Regex has flags", + ), +] + + +# Property [Type Error Precedence]: options type errors take precedence over null propagation. When +# both input and regex have wrong types, regex error wins. Wrong-type arguments error even when the +# other is null or missing. +REGEXFINDALL_PRECEDENCE_ERROR_TESTS: list[RegexFindAllTest] = [ + # Options type error takes precedence over null input. + RegexFindAllTest( + "precedence_options_over_null_input", + input=None, + regex="abc", + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll options type error should take precedence over null input", + ), + # Options type error takes precedence over null regex. + RegexFindAllTest( + "precedence_options_over_null_regex", + input="abc", + regex=None, + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll options type error should take precedence over null regex", + ), + # Options type error takes precedence over both null. + RegexFindAllTest( + "precedence_options_over_both_null", + input=None, + regex=None, + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll options type error should take precedence over both null input/regex", + ), + # Both input and regex wrong type: regex error wins. + RegexFindAllTest( + "precedence_regex_over_input", + input=123, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll regex type error should take precedence over input type error", + ), + # Wrong-type input still errors when regex is null. + RegexFindAllTest( + "precedence_input_type_with_null_regex", + input=123, + regex=None, + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should report input type error even when regex is null", + ), + # Wrong-type regex still errors when input is null. + RegexFindAllTest( + "precedence_regex_type_with_null_input", + input=None, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should report regex type error even when input is null", + ), + # Missing input does not bypass type check of regex. + RegexFindAllTest( + "precedence_missing_input_wrong_regex", + input=MISSING, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should report regex type error even when input is missing", + ), + # Missing regex does not bypass type check of input. + RegexFindAllTest( + "precedence_wrong_input_missing_regex", + input=123, + regex=MISSING, + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should report input type error even when regex is missing", + ), + # Bad option flag takes precedence over null input. + RegexFindAllTest( + "precedence_bad_option_over_null_input", + input=None, + regex="abc", + options="z", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexFindAll bad option flag should take precedence over null input", + ), + # Invalid regex pattern takes precedence over null input. + RegexFindAllTest( + "precedence_bad_pattern_over_null_input", + input=None, + regex="[invalid", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFindAll bad pattern error should take precedence over null input", + ), + # Options conflict takes precedence over wrong-type input. + RegexFindAllTest( + "precedence_conflict_over_input_type", + input=123, + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFindAll options conflict should take precedence over input type error", + ), + # Options conflict takes precedence over null input. + RegexFindAllTest( + "precedence_conflict_over_null_input", + input=None, + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexFindAll options conflict should take precedence over null input", + ), +] + +REGEXFINDALL_PRECEDENCE_ALL_TESTS = ( + REGEXFINDALL_PRECEDENCE_SUCCESS_TESTS + + REGEXFINDALL_OPTIONS_CONFLICT_TESTS + + REGEXFINDALL_PRECEDENCE_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_PRECEDENCE_ALL_TESTS)) +def test_regexfindall_precedence(collection, test_case: RegexFindAllTest): + """Test $regexFindAll error precedence and options conflict.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py new file mode 100644 index 00000000..22572e55 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import REGEX_BAD_PATTERN_ERROR, STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import REGEX_PATTERN_LIMIT_BYTES, STRING_SIZE_LIMIT_BYTES +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [String Size Limit - Success]: input one byte under the limit is accepted. +REGEXFINDALL_SIZE_LIMIT_SUCCESS_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "size_one_under", + input="a" * (STRING_SIZE_LIMIT_BYTES - 4) + "XYZ", + regex="XYZ", + expected=[{"match": "XYZ", "idx": STRING_SIZE_LIMIT_BYTES - 4, "captures": []}], + msg="$regexFindAll should accept input one byte under the size limit", + ), + RegexFindAllTest( + "size_regex_at_pattern_limit", + input="a" * REGEX_PATTERN_LIMIT_BYTES, + regex="a" * REGEX_PATTERN_LIMIT_BYTES, + expected=[{"match": "a" * REGEX_PATTERN_LIMIT_BYTES, "idx": 0, "captures": []}], + msg="$regexFindAll should accept regex at the pattern length limit", + ), + RegexFindAllTest( + "size_two_matches", + input="XY" + "a" * (STRING_SIZE_LIMIT_BYTES - 5) + "XY", + regex="XY", + expected=[ + {"match": "XY", "idx": 0, "captures": []}, + {"match": "XY", "idx": STRING_SIZE_LIMIT_BYTES - 3, "captures": []}, + ], + msg="$regexFindAll should find matches at start and end of a large string", + ), +] + + +# Property [String Size Limit - Error]: input at the size limit or regex over the pattern limit +# produces an error. +REGEXFINDALL_SIZE_LIMIT_ERROR_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "size_at_limit", + input="a" * STRING_SIZE_LIMIT_BYTES, + regex="a", + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$regexFindAll should reject input at the size limit", + ), + RegexFindAllTest( + "size_regex_over_pattern_limit", + input="a", + regex="a" * (REGEX_PATTERN_LIMIT_BYTES + 1), + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexFindAll should reject regex over the pattern length limit", + ), +] + +REGEXFINDALL_SIZE_LIMIT_ALL_TESTS = ( + REGEXFINDALL_SIZE_LIMIT_SUCCESS_TESTS + REGEXFINDALL_SIZE_LIMIT_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_SIZE_LIMIT_ALL_TESTS)) +def test_regexfindall_size_limit(collection, test_case: RegexFindAllTest): + """Test $regexFindAll string size limit behavior.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py new file mode 100644 index 00000000..d48fa6cc --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py @@ -0,0 +1,390 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + REGEX_INPUT_TYPE_ERROR, + REGEX_OPTIONS_TYPE_ERROR, + REGEX_REGEX_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Strictness - input]: non-string, non-null input produces an error. +REGEXFINDALL_INPUT_TYPE_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "type_input_array", + input=["a"], + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject array as input", + ), + RegexFindAllTest( + "type_input_binary", + input=Binary(b"data"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject binary as input", + ), + RegexFindAllTest( + "type_input_bool", + input=True, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject bool as input", + ), + RegexFindAllTest( + "type_input_date", + input=datetime(2024, 1, 1, tzinfo=timezone.utc), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject date as input", + ), + RegexFindAllTest( + "type_input_decimal128", + input=DECIMAL128_ONE_AND_HALF, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject decimal128 as input", + ), + RegexFindAllTest( + "type_input_float", + input=3.14, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject float as input", + ), + RegexFindAllTest( + "type_input_int", + input=42, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject int as input", + ), + RegexFindAllTest( + "type_input_long", + input=Int64(42), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject long as input", + ), + RegexFindAllTest( + "type_input_maxkey", + input=MaxKey(), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject maxkey as input", + ), + RegexFindAllTest( + "type_input_minkey", + input=MinKey(), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject minkey as input", + ), + RegexFindAllTest( + "type_input_object", + input={"a": 1}, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject object as input", + ), + RegexFindAllTest( + "type_input_objectid", + input=ObjectId("507f1f77bcf86cd799439011"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject objectid as input", + ), + RegexFindAllTest( + "type_input_regex", + input=Regex("pattern"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject regex as input", + ), + RegexFindAllTest( + "type_input_timestamp", + input=Timestamp(1, 1), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject timestamp as input", + ), + RegexFindAllTest( + "type_input_code", + input=Code("function() {}"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject javascript code as input", + ), + RegexFindAllTest( + "type_input_code_scope", + input=Code("function() {}", {"x": 1}), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexFindAll should reject javascript code with scope as input", + ), +] + +# Property [Type Strictness - regex]: non-string, non-Regex, non-null regex produces an error. +REGEXFINDALL_REGEX_TYPE_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "type_regex_array", + input="hello", + regex=["a"], + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject array as regex", + ), + RegexFindAllTest( + "type_regex_binary", + input="hello", + regex=Binary(b"data"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject binary as regex", + ), + RegexFindAllTest( + "type_regex_bool", + input="hello", + regex=True, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject bool as regex", + ), + RegexFindAllTest( + "type_regex_date", + input="hello", + regex=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject date as regex", + ), + RegexFindAllTest( + "type_regex_decimal128", + input="hello", + regex=DECIMAL128_ONE_AND_HALF, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject decimal128 as regex", + ), + RegexFindAllTest( + "type_regex_float", + input="hello", + regex=3.14, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject float as regex", + ), + RegexFindAllTest( + "type_regex_int", + input="hello", + regex=42, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject int as regex", + ), + RegexFindAllTest( + "type_regex_long", + input="hello", + regex=Int64(42), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject long as regex", + ), + RegexFindAllTest( + "type_regex_maxkey", + input="hello", + regex=MaxKey(), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject maxkey as regex", + ), + RegexFindAllTest( + "type_regex_minkey", + input="hello", + regex=MinKey(), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject minkey as regex", + ), + RegexFindAllTest( + "type_regex_object", + input="hello", + regex={"a": 1}, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject object as regex", + ), + RegexFindAllTest( + "type_regex_objectid", + input="hello", + regex=ObjectId("507f1f77bcf86cd799439011"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject objectid as regex", + ), + RegexFindAllTest( + "type_regex_timestamp", + input="hello", + regex=Timestamp(1, 1), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject timestamp as regex", + ), + RegexFindAllTest( + "type_regex_code", + input="hello", + regex=Code("function() {}"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject javascript code as regex", + ), + RegexFindAllTest( + "type_regex_code_scope", + input="hello", + regex=Code("function() {}", {"x": 1}), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexFindAll should reject javascript code with scope as regex", + ), +] + +# Property [Type Strictness - options]: non-string options (excluding null) produces an error. +REGEXFINDALL_OPTIONS_TYPE_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "type_options_array", + input="hello", + regex="hello", + options=["a"], + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject array as options", + ), + RegexFindAllTest( + "type_options_binary", + input="hello", + regex="hello", + options=Binary(b"data"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject binary as options", + ), + RegexFindAllTest( + "type_options_bool", + input="hello", + regex="hello", + options=True, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject bool as options", + ), + RegexFindAllTest( + "type_options_date", + input="hello", + regex="hello", + options=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject date as options", + ), + RegexFindAllTest( + "type_options_decimal128", + input="hello", + regex="hello", + options=DECIMAL128_ONE_AND_HALF, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject decimal128 as options", + ), + RegexFindAllTest( + "type_options_float", + input="hello", + regex="hello", + options=3.14, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject float as options", + ), + RegexFindAllTest( + "type_options_int", + input="hello", + regex="hello", + options=42, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject int as options", + ), + RegexFindAllTest( + "type_options_long", + input="hello", + regex="hello", + options=Int64(42), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject long as options", + ), + RegexFindAllTest( + "type_options_maxkey", + input="hello", + regex="hello", + options=MaxKey(), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject maxkey as options", + ), + RegexFindAllTest( + "type_options_minkey", + input="hello", + regex="hello", + options=MinKey(), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject minkey as options", + ), + RegexFindAllTest( + "type_options_object", + input="hello", + regex="hello", + options={"a": 1}, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject object as options", + ), + RegexFindAllTest( + "type_options_objectid", + input="hello", + regex="hello", + options=ObjectId("507f1f77bcf86cd799439011"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject objectid as options", + ), + RegexFindAllTest( + "type_options_regex", + input="hello", + regex="hello", + options=Regex("pattern"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject regex as options", + ), + RegexFindAllTest( + "type_options_timestamp", + input="hello", + regex="hello", + options=Timestamp(1, 1), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject timestamp as options", + ), + RegexFindAllTest( + "type_options_code", + input="hello", + regex="hello", + options=Code("function() {}"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject javascript code as options", + ), + RegexFindAllTest( + "type_options_code_scope", + input="hello", + regex="hello", + options=Code("function() {}", {"x": 1}), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexFindAll should reject javascript code with scope as options", + ), +] + +REGEXFINDALL_TYPE_ERRORS_ALL_TESTS = ( + REGEXFINDALL_INPUT_TYPE_TESTS + REGEXFINDALL_REGEX_TYPE_TESTS + REGEXFINDALL_OPTIONS_TYPE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_TYPE_ERRORS_ALL_TESTS)) +def test_regexfindall_type_errors(collection, test_case: RegexFindAllTest): + """Test $regexFindAll type strictness for input, regex, and options.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py new file mode 100644 index 00000000..b4d6c6fa --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project_with_insert, +) + +# Property [Expression Arguments]: input, regex, and options accept expressions that resolve to the +# appropriate type. +REGEXFINDALL_EXPR_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "expr_input", + input={"$concat": ["hel", "lo"]}, + regex="hello", + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should accept expression resolving to string for input", + ), + RegexFindAllTest( + "expr_regex", + input="hello", + regex={"$concat": ["hel", "lo"]}, + expected=[{"match": "hello", "idx": 0, "captures": []}], + msg="$regexFindAll should accept expression resolving to string for regex", + ), + RegexFindAllTest( + "expr_options", + input="HELLO", + regex="hello", + options={"$concat": ["", "i"]}, + expected=[{"match": "HELLO", "idx": 0, "captures": []}], + msg="$regexFindAll should accept expression resolving to string for options", + ), + # $literal for dollar sign in regex. + RegexFindAllTest( + "expr_literal_dollar_regex", + input="price: $100", + regex={"$literal": "\\$[0-9]+"}, + expected=[{"match": "$100", "idx": 7, "captures": []}], + msg="$regexFindAll should accept $literal expression for regex with dollar sign", + ), +] + + +# Property [Edge Cases - string literal]: a string starting with "$" in the input field is +# treated as a field path reference, not a literal string. When the referenced field is missing, +# the input resolves to null and null propagation applies. +REGEXFINDALL_LITERAL_INPUT_TESTS: list[RegexFindAllTest] = [ + RegexFindAllTest( + "edge_dollar_input_is_field_ref", + input="$nonexistent", + regex="\\$nonexistent", + expected=[], + msg="$regexFindAll should treat dollar-prefixed input as field path reference, not literal", + ), +] + +REGEXFINDALL_USAGE_ALL_TESTS = REGEXFINDALL_EXPR_TESTS + REGEXFINDALL_LITERAL_INPUT_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXFINDALL_USAGE_ALL_TESTS)) +def test_regexfindall_usage(collection, test_case: RegexFindAllTest): + """Test $regexFindAll expression arguments and field reference behavior.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Document Field References]: $regexFindAll works with field references +# from inserted documents, not just inline literals. +def test_regexfindall_document_fields(collection): + """Test $regexFindAll reads values from document fields.""" + result = execute_project_with_insert( + collection, + {"s": "abc123def456"}, + {"result": {"$regexFindAll": {"input": "$s", "regex": "[0-9]+"}}}, + ) + assertSuccess( + result, + [ + { + "result": [ + {"match": "123", "idx": 3, "captures": []}, + {"match": "456", "idx": 9, "captures": []}, + ] + } + ], + msg="$regexFindAll should find all digit-sequence matches from a document field", + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/utils/regexFindAll_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/utils/regexFindAll_common.py new file mode 100644 index 00000000..dbe5179b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/utils/regexFindAll_common.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from documentdb_tests.framework.test_case import BaseTestCase + +# Sentinel for "omit this parameter from the expression." Distinct from None (which means pass null) +# and MISSING (which means reference a missing field). +_OMIT = object() + + +@dataclass(frozen=True) +class RegexFindAllTest(BaseTestCase): + """Test case for $regexFindAll operator.""" + + input: Any = None + regex: Any = None + options: Any = _OMIT + expr: Any = None # Raw expression override for syntax tests + + +def _expr(test_case: RegexFindAllTest) -> dict[str, Any]: + if test_case.expr is not None: + return cast(dict[str, Any], test_case.expr) + params: dict[str, Any] = {"input": test_case.input, "regex": test_case.regex} + if test_case.options is not _OMIT: + params["options"] = test_case.options + return {"$regexFindAll": params} diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py new file mode 100644 index 00000000..0b0bcceb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py @@ -0,0 +1,278 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, + REGEX_BAD_OPTION_ERROR, + REGEX_BAD_PATTERN_ERROR, + REGEX_MISSING_INPUT_ERROR, + REGEX_MISSING_REGEX_ERROR, + REGEX_NON_OBJECT_ERROR, + REGEX_NULL_BYTE_ERROR, + REGEX_OPTIONS_CONFLICT_ERROR, + REGEX_OPTIONS_NULL_BYTE_ERROR, + REGEX_UNKNOWN_FIELD_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Syntax Validation]: missing required fields or unknown fields produce errors. +REGEXMATCH_SYNTAX_ERROR_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "syntax_non_object_string", + expr={"$regexMatch": "string"}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexMatch should reject string as argument", + ), + RegexMatchTest( + "syntax_non_object_array", + expr={"$regexMatch": ["array"]}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexMatch should reject array as argument", + ), + RegexMatchTest( + "syntax_non_object_null", + expr={"$regexMatch": None}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexMatch should reject null as argument", + ), + RegexMatchTest( + "syntax_non_object_int", + expr={"$regexMatch": 42}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexMatch should reject int as argument", + ), + RegexMatchTest( + "syntax_non_object_bool", + expr={"$regexMatch": True}, + error_code=REGEX_NON_OBJECT_ERROR, + msg="$regexMatch should reject bool as argument", + ), + RegexMatchTest( + "syntax_empty_object", + expr={"$regexMatch": {}}, + error_code=REGEX_MISSING_INPUT_ERROR, + msg="$regexMatch should reject empty object", + ), + RegexMatchTest( + "syntax_missing_input", + expr={"$regexMatch": {"regex": "abc"}}, + error_code=REGEX_MISSING_INPUT_ERROR, + msg="$regexMatch should reject missing input field", + ), + RegexMatchTest( + "syntax_missing_regex", + expr={"$regexMatch": {"input": "abc"}}, + error_code=REGEX_MISSING_REGEX_ERROR, + msg="$regexMatch should reject missing regex field", + ), + RegexMatchTest( + "syntax_unknown_field", + expr={"$regexMatch": {"input": "abc", "regex": "abc", "bogus": 1}}, + error_code=REGEX_UNKNOWN_FIELD_ERROR, + msg="$regexMatch should reject unknown fields", + ), +] + + +REGEXMATCH_OPTIONS_CONFLICT_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "options_conflict_same_flags", + input="hello", + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexMatch should error when options duplicates BSON flags", + ), + RegexMatchTest( + "options_conflict_different_flags", + input="hello", + regex=Regex("hello", "i"), + options="m", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexMatch should error when options differs from BSON flags", + ), + RegexMatchTest( + "options_conflict_empty_options", + input="hello", + regex=Regex("hello", "i"), + options="", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexMatch should error on empty options with BSON flags", + ), +] + + +# Property [Type Strictness - pattern]: invalid regex pattern produces an error. +REGEXMATCH_BAD_PATTERN_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "type_bad_pattern_bracket", + input="abc", + regex="[invalid", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexMatch should reject unclosed bracket in pattern", + ), + RegexMatchTest( + "type_bad_pattern_paren", + input="abc", + regex="(unclosed", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexMatch should reject unclosed paren in pattern", + ), + RegexMatchTest( + "type_bad_pattern_var_lookbehind", + input="abc", + regex="(?<=a+)b", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexMatch should reject variable-length lookbehind", + ), +] + +REGEXMATCH_NULL_BYTE_PATTERN_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "type_null_byte_in_pattern", + input="abc", + regex="ab\x00c", + error_code=REGEX_NULL_BYTE_ERROR, + msg="$regexMatch should reject null byte in regex pattern", + ), +] + +REGEXMATCH_BAD_OPTION_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "type_bad_option", + input="abc", + regex="abc", + options="z", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexMatch should reject unrecognized option character", + ), + RegexMatchTest( + "type_bad_option_leading_whitespace", + input="abc", + regex="abc", + options=" i", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexMatch should reject leading whitespace in options", + ), + RegexMatchTest( + "type_bad_option_trailing_whitespace", + input="abc", + regex="abc", + options="i ", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexMatch should reject trailing whitespace in options", + ), + RegexMatchTest( + "type_bad_option_mixed_valid_invalid", + input="abc", + regex="abc", + options="ig", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexMatch should reject mix of valid and invalid options", + ), + RegexMatchTest( + "type_bad_option_uppercase_I", + input="abc", + regex="abc", + options="I", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexMatch should reject uppercase 'I' option", + ), + RegexMatchTest( + "type_bad_option_uppercase_M", + input="abc", + regex="abc", + options="M", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexMatch should reject uppercase 'M' option", + ), +] + +REGEXMATCH_OPTIONS_NULL_BYTE_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "type_null_byte_in_options", + input="abc", + regex="abc", + options="i\x00m", + error_code=REGEX_OPTIONS_NULL_BYTE_ERROR, + msg="$regexMatch should reject null byte in options string", + ), +] + + +# Property [Dollar Sign Error]: a bare "$" is interpreted as a field path and "$$" is interpreted +# as an empty variable name. +REGEXMATCH_DOLLAR_ERROR_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "dollar_bare_input", + input="$", + regex="abc", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexMatch should reject bare '$' as input field path", + ), + RegexMatchTest( + "dollar_bare_regex", + input="hello", + regex="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexMatch should reject bare '$' as regex field path", + ), + RegexMatchTest( + "dollar_bare_options", + input="hello", + regex="abc", + options="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$regexMatch should reject bare '$' as options field path", + ), + RegexMatchTest( + "dollar_double_input", + input="$$", + regex="abc", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexMatch should reject '$$' as empty variable in input", + ), + RegexMatchTest( + "dollar_double_regex", + input="hello", + regex="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexMatch should reject '$$' as empty variable in regex", + ), + RegexMatchTest( + "dollar_double_options", + input="hello", + regex="abc", + options="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$regexMatch should reject '$$' as empty variable in options", + ), +] + +REGEXMATCH_INVALID_ARGS_ALL_TESTS = ( + REGEXMATCH_SYNTAX_ERROR_TESTS + + REGEXMATCH_OPTIONS_CONFLICT_TESTS + + REGEXMATCH_BAD_PATTERN_TESTS + + REGEXMATCH_NULL_BYTE_PATTERN_TESTS + + REGEXMATCH_BAD_OPTION_TESTS + + REGEXMATCH_OPTIONS_NULL_BYTE_TESTS + + REGEXMATCH_DOLLAR_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_INVALID_ARGS_ALL_TESTS)) +def test_regexmatch_cases(collection, test_case: RegexMatchTest): + """Test $regexMatch invalid argument cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py new file mode 100644 index 00000000..f361cefd --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Expression Arguments]: input, regex, and options accept expressions that resolve to +# the appropriate type. +REGEXMATCH_EXPR_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "expr_input", + input={"$concat": ["hel", "lo"]}, + regex="hello", + expected=True, + msg="$regexMatch should accept expression for input", + ), + RegexMatchTest( + "expr_regex", + input="hello", + regex={"$concat": ["hel", "lo"]}, + expected=True, + msg="$regexMatch should accept expression for regex", + ), + RegexMatchTest( + "expr_options", + input="HELLO", + regex="hello", + options={"$concat": ["", "i"]}, + expected=True, + msg="$regexMatch should accept expression for options", + ), + RegexMatchTest( + "expr_literal_dollar_regex", + input="price: $100", + regex={"$literal": "\\$[0-9]+"}, + expected=True, + msg="$regexMatch should accept $literal for dollar in regex", + ), +] + + +REGEXMATCH_LITERAL_INPUT_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "edge_dollar_input_is_field_ref", + input="$nonexistent", + regex="\\$nonexistent", + expected=False, + msg="$regexMatch should treat dollar-prefixed input as field ref", + ), +] + +# Property [Edge Cases]: empty strings, large inputs, and control characters are handled +# correctly. +REGEXMATCH_EDGE_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "edge_empty_input_empty_regex", + input="", + regex="", + expected=True, + msg="$regexMatch should match empty regex on empty input", + ), + RegexMatchTest( + "edge_nonempty_input_empty_regex", + input="hello", + regex="", + expected=True, + msg="$regexMatch empty regex should match any input", + ), + RegexMatchTest( + "edge_empty_input_nonempty_regex", + input="", + regex="abc", + expected=False, + msg="$regexMatch should return false for no match on empty input", + ), + RegexMatchTest( + "edge_empty_input_dotstar", + input="", + regex=".*", + expected=True, + msg="$regexMatch .* should match empty input", + ), + RegexMatchTest( + "edge_empty_input_anchored_empty", + input="", + regex="^$", + expected=True, + msg="$regexMatch ^$ should match empty input", + ), + RegexMatchTest( + "edge_anchored_full_match", + input="hello", + regex="^hello$", + expected=True, + msg="$regexMatch anchored pattern should match full string", + ), + RegexMatchTest( + "edge_anchored_partial_no_match", + input="hello world", + regex="^hello$", + expected=False, + msg="$regexMatch anchored pattern should not match partial", + ), + RegexMatchTest( + "edge_partial_match", + input="hello world", + regex="world", + expected=True, + msg="$regexMatch should match substring of input", + ), + RegexMatchTest( + "edge_newline", + input="hello\nworld", + regex="world", + expected=True, + msg="$regexMatch should match across newline", + ), + RegexMatchTest( + "edge_tab", + input="hello\tworld", + regex="world", + expected=True, + msg="$regexMatch should match across tab", + ), + RegexMatchTest( + "edge_null_byte", + input="hello\x00world", + regex="world", + expected=True, + msg="$regexMatch should match across null byte", + ), + RegexMatchTest( + "edge_carriage_return", + input="hello\rworld", + regex="world", + expected=True, + msg="$regexMatch should match across carriage return", + ), + RegexMatchTest( + "edge_s_no_nbsp", + input="\u00a0hello", + regex="\\s", + expected=False, + msg="$regexMatch \\s should not match NBSP", + ), +] + +REGEXMATCH_MATCHING_ALL_TESTS = ( + REGEXMATCH_EXPR_TESTS + REGEXMATCH_LITERAL_INPUT_TESTS + REGEXMATCH_EDGE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_MATCHING_ALL_TESTS)) +def test_regexmatch_cases(collection, test_case: RegexMatchTest): + """Test $regexMatch matching behavior cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py new file mode 100644 index 00000000..f2db54cc --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +_PLACEHOLDER = object() +_NULL_PATTERNS = [ + (_PLACEHOLDER, "abc", "input"), + ("hello", _PLACEHOLDER, "regex"), + (_PLACEHOLDER, _PLACEHOLDER, "both"), +] + + +_MSG_MAP = { + "input": "$regexMatch should return false when input is {kind}", + "regex": "$regexMatch should return false when regex is {kind}", + "both": "$regexMatch should return false when both args are {kind}", +} + + +def _build_null_tests(null_value, prefix) -> list[RegexMatchTest]: + kind = "null" if null_value is None else "missing" + return [ + RegexMatchTest( + f"{prefix}_{suffix}", + input=null_value if _input is _PLACEHOLDER else _input, + regex=null_value if _regex is _PLACEHOLDER else _regex, + expected=False, + msg=_MSG_MAP[suffix].format(kind=kind), + ) + for _input, _regex, suffix in _NULL_PATTERNS + ] + + +# Property [Null Propagation]: null in input or regex causes the result to be false. +REGEXMATCH_NULL_TESTS = _build_null_tests(None, "null") + +# Property [Null Propagation - missing]: missing fields in input or regex are treated as null. +REGEXMATCH_MISSING_TESTS = _build_null_tests(MISSING, "missing") + +# Property [Null Propagation - mixed]: combining null and missing across input and regex +# still produces false. +REGEXMATCH_MIXED_NULL_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "mixed_null_input_missing_regex", + input=None, + regex=MISSING, + expected=False, + msg="$regexMatch should return false for null input and missing regex", + ), + RegexMatchTest( + "mixed_missing_input_null_regex", + input=MISSING, + regex=None, + expected=False, + msg="$regexMatch should return false for missing input and null regex", + ), +] + +# Property [Options Null]: null in options does not cause the result to be false; the match +# proceeds normally. +REGEXMATCH_OPTIONS_NULL_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "options_null", + input="hello", + regex="hello", + options=None, + expected=True, + msg="$regexMatch should not return false on null options", + ), +] + +# Property [Options Null - missing]: missing field in options does not cause the result to be +# false; the match proceeds normally. +REGEXMATCH_OPTIONS_MISSING_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "options_missing", + input="hello", + regex="hello", + options=MISSING, + expected=True, + msg="$regexMatch should not return false on missing options", + ), + # Missing options does not conflict with BSON Regex flags. + RegexMatchTest( + "options_no_conflict_missing_options_with_flags", + input="HELLO", + regex=Regex("hello", "i"), + options=MISSING, + expected=True, + msg="$regexMatch missing options should not conflict with BSON flags", + ), +] + + +REGEXMATCH_EXPR_NULL_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "expr_null_input", + input={"$cond": [False, "hello", None]}, + regex="hello", + expected=False, + msg="$regexMatch should return false when expr input resolves to null", + ), + RegexMatchTest( + "expr_null_regex", + input="hello", + regex={"$cond": [False, "hello", None]}, + expected=False, + msg="$regexMatch should return false when expr regex resolves to null", + ), +] + + +# Property [Null Precedence]: null propagation from regex takes precedence over bad option flag +# validation. +REGEXMATCH_PRECEDENCE_SUCCESS_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "precedence_null_regex_over_bad_option", + input="abc", + regex=None, + options="z", + expected=False, + msg="$regexMatch null regex should take precedence over bad option", + ), +] + +REGEXMATCH_NULL_ALL_TESTS = ( + REGEXMATCH_NULL_TESTS + + REGEXMATCH_MISSING_TESTS + + REGEXMATCH_MIXED_NULL_TESTS + + REGEXMATCH_OPTIONS_NULL_TESTS + + REGEXMATCH_OPTIONS_MISSING_TESTS + + REGEXMATCH_EXPR_NULL_TESTS + + REGEXMATCH_PRECEDENCE_SUCCESS_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_NULL_ALL_TESTS)) +def test_regexmatch_cases(collection, test_case: RegexMatchTest): + """Test $regexMatch null propagation cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py new file mode 100644 index 00000000..9d90894e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Regex Options]: options field accepts i, m, s, x as a string or BSON Regex flags. +# Empty string, duplicates, and "u" (PCRE UTF-8 flag) are valid. Invalid BSON Regex flags are +# silently accepted. +REGEXMATCH_OPTIONS_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "options_empty_string", + input="hello", + regex="hello", + options="", + expected=True, + msg="$regexMatch should accept empty string options", + ), + RegexMatchTest( + "options_i_case_insensitive", + input="HELLO", + regex="hello", + options="i", + expected=True, + msg="$regexMatch option 'i' should enable case-insensitive match", + ), + RegexMatchTest( + "options_m_multiline", + input="line1\nline2", + regex="^line2", + options="m", + expected=True, + msg="$regexMatch option 'm' should match ^ at line start", + ), + RegexMatchTest( + "options_m_crlf", + input="line1\r\nline2", + regex="^line2", + options="m", + expected=True, + msg="$regexMatch option 'm' should recognize CRLF as line ending", + ), + RegexMatchTest( + "options_s_dotall", + input="line1\nline2", + regex="line1.line2", + options="s", + expected=True, + msg="$regexMatch option 's' should make dot match newline", + ), + RegexMatchTest( + "options_x_extended", + input="hello", + regex="hel lo", + options="x", + expected=True, + msg="$regexMatch option 'x' should ignore whitespace in pattern", + ), + RegexMatchTest( + "options_u_silently_accepted", + input="hello", + regex="hello", + options="u", + expected=True, + msg="$regexMatch should silently accept 'u' option", + ), + RegexMatchTest( + "options_combined_im", + input="HELLO\nWORLD", + regex="^world", + options="im", + expected=True, + msg="$regexMatch should support combined 'im' options", + ), + RegexMatchTest( + "options_duplicate_ii", + input="HELLO", + regex="hello", + options="ii", + expected=True, + msg="$regexMatch should accept duplicate option characters", + ), + RegexMatchTest( + "options_bson_regex_flag_i", + input="HELLO", + regex=Regex("hello", "i"), + expected=True, + msg="$regexMatch should honor BSON Regex 'i' flag", + ), + RegexMatchTest( + "options_bson_regex_flag_s", + input="a\nb", + regex=Regex("a.b", "s"), + expected=True, + msg="$regexMatch should honor BSON Regex 's' flag", + ), + RegexMatchTest( + "options_bson_regex_flag_x", + input="hello", + regex=Regex("hel lo", "x"), + expected=True, + msg="$regexMatch should honor BSON Regex 'x' flag", + ), + RegexMatchTest( + "options_bson_regex_invalid_flag_silent", + input="hello", + regex=Regex("hello", "z"), + expected=True, + msg="$regexMatch should silently accept invalid BSON Regex flags", + ), + RegexMatchTest( + "options_all_four_combined", + input="HELLO", + regex="hel lo", + options="imsx", + expected=True, + msg="$regexMatch should support all four options combined", + ), + RegexMatchTest( + "options_no_conflict_null_options_with_flags", + input="HELLO", + regex=Regex("hello", "i"), + options=None, + expected=True, + msg="$regexMatch null options should not conflict with BSON flags", + ), + RegexMatchTest( + "options_no_conflict_no_flags_with_options", + input="HELLO", + regex=Regex("hello"), + options="i", + expected=True, + msg="$regexMatch should allow options with flagless BSON Regex", + ), + RegexMatchTest( + "options_no_conflict_empty_flags_with_options", + input="HELLO", + regex=Regex("hello", ""), + options="i", + expected=True, + msg="$regexMatch empty BSON flags should not conflict with options", + ), +] + + +# Property [Options Conflict - no conflict]: BSON Regex with only unrecognized flags. +REGEXMATCH_OPTIONS_NO_CONFLICT_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "options_no_conflict_unrecognized_flag", + input="hello", + regex=Regex("hello", "z"), + options="i", + expected=True, + msg="$regexMatch unrecognized BSON flag should not conflict", + ), +] + +REGEXMATCH_OPTIONS_ALL_TESTS = REGEXMATCH_OPTIONS_TESTS + REGEXMATCH_OPTIONS_NO_CONFLICT_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_OPTIONS_ALL_TESTS)) +def test_regexmatch_cases(collection, test_case: RegexMatchTest): + """Test $regexMatch options cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py new file mode 100644 index 00000000..5d498a6e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import pytest +from bson import Regex + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + REGEX_BAD_OPTION_ERROR, + REGEX_BAD_PATTERN_ERROR, + REGEX_INPUT_TYPE_ERROR, + REGEX_OPTIONS_CONFLICT_ERROR, + REGEX_OPTIONS_TYPE_ERROR, + REGEX_REGEX_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Error Precedence]: options type errors take precedence over null propagation. +# When both input and regex have wrong types, regex error wins. Wrong-type arguments error even +# when the other is null or missing. +REGEXMATCH_PRECEDENCE_ERROR_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "precedence_options_over_null_input", + input=None, + regex="abc", + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch options type error should precede null input", + ), + RegexMatchTest( + "precedence_options_over_null_regex", + input="abc", + regex=None, + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch options type error should precede null regex", + ), + RegexMatchTest( + "precedence_options_over_both_null", + input=None, + regex=None, + options=123, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch options type error should precede both null", + ), + RegexMatchTest( + "precedence_regex_over_input", + input=123, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch regex type error should precede input type error", + ), + RegexMatchTest( + "precedence_input_type_with_null_regex", + input=123, + regex=None, + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should error on wrong-type input with null regex", + ), + RegexMatchTest( + "precedence_regex_type_with_null_input", + input=None, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should error on wrong-type regex with null input", + ), + RegexMatchTest( + "precedence_missing_input_wrong_regex", + input=MISSING, + regex=123, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch missing input should not bypass regex type check", + ), + RegexMatchTest( + "precedence_wrong_input_missing_regex", + input=123, + regex=MISSING, + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch missing regex should not bypass input type check", + ), + RegexMatchTest( + "precedence_bad_option_over_null_input", + input=None, + regex="abc", + options="z", + error_code=REGEX_BAD_OPTION_ERROR, + msg="$regexMatch bad option should precede null input", + ), + RegexMatchTest( + "precedence_bad_pattern_over_null_input", + input=None, + regex="[invalid", + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexMatch bad pattern should precede null input", + ), + RegexMatchTest( + "precedence_conflict_over_input_type", + input=123, + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexMatch options conflict should precede input type error", + ), + RegexMatchTest( + "precedence_conflict_over_null_input", + input=None, + regex=Regex("hello", "i"), + options="i", + error_code=REGEX_OPTIONS_CONFLICT_ERROR, + msg="$regexMatch options conflict should precede null input", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_PRECEDENCE_ERROR_TESTS)) +def test_regexmatch_cases(collection, test_case: RegexMatchTest): + """Test $regexMatch type error precedence cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py new file mode 100644 index 00000000..ddb73277 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import REGEX_BAD_PATTERN_ERROR, STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import REGEX_PATTERN_LIMIT_BYTES, STRING_SIZE_LIMIT_BYTES +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [String Size Limit - Success]: input one byte under the limit is accepted. +REGEXMATCH_SIZE_LIMIT_SUCCESS_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "size_one_under", + input="a" * (STRING_SIZE_LIMIT_BYTES - 4) + "XYZ", + regex="XYZ", + expected=True, + msg="$regexMatch should accept input one byte under the size limit", + ), + RegexMatchTest( + "size_regex_at_pattern_limit", + input="a" * REGEX_PATTERN_LIMIT_BYTES, + regex="a" * REGEX_PATTERN_LIMIT_BYTES, + expected=True, + msg="$regexMatch should accept regex at the pattern length limit", + ), +] + + +# Property [String Size Limit - Error]: input at the size limit produces an error. +REGEXMATCH_SIZE_LIMIT_ERROR_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "size_at_limit", + input="a" * STRING_SIZE_LIMIT_BYTES, + regex="a", + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$regexMatch should reject input at the size limit", + ), + RegexMatchTest( + "size_regex_over_pattern_limit", + input="a", + regex="a" * (REGEX_PATTERN_LIMIT_BYTES + 1), + error_code=REGEX_BAD_PATTERN_ERROR, + msg="$regexMatch should reject regex over the pattern length limit", + ), +] + +REGEXMATCH_SIZE_LIMIT_ALL_TESTS = ( + REGEXMATCH_SIZE_LIMIT_SUCCESS_TESTS + REGEXMATCH_SIZE_LIMIT_ERROR_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_SIZE_LIMIT_ALL_TESTS)) +def test_regexmatch_cases(collection, test_case: RegexMatchTest): + """Test $regexMatch size limit cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py new file mode 100644 index 00000000..3a062e2d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py @@ -0,0 +1,390 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + REGEX_INPUT_TYPE_ERROR, + REGEX_OPTIONS_TYPE_ERROR, + REGEX_REGEX_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Strictness - input]: non-string, non-null input produces an error. +REGEXMATCH_INPUT_TYPE_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "type_input_array", + input=["a"], + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject array as input", + ), + RegexMatchTest( + "type_input_binary", + input=Binary(b"data"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject binary as input", + ), + RegexMatchTest( + "type_input_bool", + input=True, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject bool as input", + ), + RegexMatchTest( + "type_input_date", + input=datetime(2024, 1, 1, tzinfo=timezone.utc), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject date as input", + ), + RegexMatchTest( + "type_input_decimal128", + input=DECIMAL128_ONE_AND_HALF, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject decimal128 as input", + ), + RegexMatchTest( + "type_input_float", + input=3.14, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject float as input", + ), + RegexMatchTest( + "type_input_int", + input=42, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject int as input", + ), + RegexMatchTest( + "type_input_long", + input=Int64(42), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject long as input", + ), + RegexMatchTest( + "type_input_maxkey", + input=MaxKey(), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject maxkey as input", + ), + RegexMatchTest( + "type_input_minkey", + input=MinKey(), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject minkey as input", + ), + RegexMatchTest( + "type_input_object", + input={"a": 1}, + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject object as input", + ), + RegexMatchTest( + "type_input_objectid", + input=ObjectId("507f1f77bcf86cd799439011"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject objectid as input", + ), + RegexMatchTest( + "type_input_regex", + input=Regex("pattern"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject regex as input", + ), + RegexMatchTest( + "type_input_timestamp", + input=Timestamp(1, 1), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject timestamp as input", + ), + RegexMatchTest( + "type_input_code", + input=Code("function() {}"), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject javascript code as input", + ), + RegexMatchTest( + "type_input_code_scope", + input=Code("function() {}", {"x": 1}), + regex="abc", + error_code=REGEX_INPUT_TYPE_ERROR, + msg="$regexMatch should reject code with scope as input", + ), +] + +# Property [Type Strictness - regex]: non-string, non-Regex, non-null regex produces an error. +REGEXMATCH_REGEX_TYPE_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "type_regex_array", + input="hello", + regex=["a"], + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject array as regex", + ), + RegexMatchTest( + "type_regex_binary", + input="hello", + regex=Binary(b"data"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject binary as regex", + ), + RegexMatchTest( + "type_regex_bool", + input="hello", + regex=True, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject bool as regex", + ), + RegexMatchTest( + "type_regex_date", + input="hello", + regex=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject date as regex", + ), + RegexMatchTest( + "type_regex_decimal128", + input="hello", + regex=DECIMAL128_ONE_AND_HALF, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject decimal128 as regex", + ), + RegexMatchTest( + "type_regex_float", + input="hello", + regex=3.14, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject float as regex", + ), + RegexMatchTest( + "type_regex_int", + input="hello", + regex=42, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject int as regex", + ), + RegexMatchTest( + "type_regex_long", + input="hello", + regex=Int64(42), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject long as regex", + ), + RegexMatchTest( + "type_regex_maxkey", + input="hello", + regex=MaxKey(), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject maxkey as regex", + ), + RegexMatchTest( + "type_regex_minkey", + input="hello", + regex=MinKey(), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject minkey as regex", + ), + RegexMatchTest( + "type_regex_object", + input="hello", + regex={"a": 1}, + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject object as regex", + ), + RegexMatchTest( + "type_regex_objectid", + input="hello", + regex=ObjectId("507f1f77bcf86cd799439011"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject objectid as regex", + ), + RegexMatchTest( + "type_regex_timestamp", + input="hello", + regex=Timestamp(1, 1), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject timestamp as regex", + ), + RegexMatchTest( + "type_regex_code", + input="hello", + regex=Code("function() {}"), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject javascript code as regex", + ), + RegexMatchTest( + "type_regex_code_scope", + input="hello", + regex=Code("function() {}", {"x": 1}), + error_code=REGEX_REGEX_TYPE_ERROR, + msg="$regexMatch should reject code with scope as regex", + ), +] + +# Property [Type Strictness - options]: non-string options (excluding null) produces an error. +REGEXMATCH_OPTIONS_TYPE_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "type_options_array", + input="hello", + regex="hello", + options=["a"], + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject array as options", + ), + RegexMatchTest( + "type_options_binary", + input="hello", + regex="hello", + options=Binary(b"data"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject binary as options", + ), + RegexMatchTest( + "type_options_bool", + input="hello", + regex="hello", + options=True, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject bool as options", + ), + RegexMatchTest( + "type_options_date", + input="hello", + regex="hello", + options=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject date as options", + ), + RegexMatchTest( + "type_options_decimal128", + input="hello", + regex="hello", + options=DECIMAL128_ONE_AND_HALF, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject decimal128 as options", + ), + RegexMatchTest( + "type_options_float", + input="hello", + regex="hello", + options=3.14, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject float as options", + ), + RegexMatchTest( + "type_options_int", + input="hello", + regex="hello", + options=42, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject int as options", + ), + RegexMatchTest( + "type_options_long", + input="hello", + regex="hello", + options=Int64(42), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject long as options", + ), + RegexMatchTest( + "type_options_maxkey", + input="hello", + regex="hello", + options=MaxKey(), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject maxkey as options", + ), + RegexMatchTest( + "type_options_minkey", + input="hello", + regex="hello", + options=MinKey(), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject minkey as options", + ), + RegexMatchTest( + "type_options_object", + input="hello", + regex="hello", + options={"a": 1}, + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject object as options", + ), + RegexMatchTest( + "type_options_objectid", + input="hello", + regex="hello", + options=ObjectId("507f1f77bcf86cd799439011"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject objectid as options", + ), + RegexMatchTest( + "type_options_regex", + input="hello", + regex="hello", + options=Regex("pattern"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject regex as options", + ), + RegexMatchTest( + "type_options_timestamp", + input="hello", + regex="hello", + options=Timestamp(1, 1), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject timestamp as options", + ), + RegexMatchTest( + "type_options_code", + input="hello", + regex="hello", + options=Code("function() {}"), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject javascript code as options", + ), + RegexMatchTest( + "type_options_code_scope", + input="hello", + regex="hello", + options=Code("function() {}", {"x": 1}), + error_code=REGEX_OPTIONS_TYPE_ERROR, + msg="$regexMatch should reject code with scope as options", + ), +] + +REGEXMATCH_TYPE_ERROR_ALL_TESTS = ( + REGEXMATCH_INPUT_TYPE_TESTS + REGEXMATCH_REGEX_TYPE_TESTS + REGEXMATCH_OPTIONS_TYPE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_TYPE_ERROR_ALL_TESTS)) +def test_regexmatch_cases(collection, test_case: RegexMatchTest): + """Test $regexMatch type strictness cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py new file mode 100644 index 00000000..2e892f1c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_project, + execute_project_with_insert, +) + + +# Property [Document Field References]: $regexMatch works with field references +# from inserted documents, not just inline literals. +def test_regexmatch_document_fields(collection): + """Test $regexMatch reads values from document fields.""" + result = execute_project_with_insert( + collection, + {"s": "hello world"}, + {"result": {"$regexMatch": {"input": "$s", "regex": "world"}}}, + ) + assertSuccess( + result, [{"result": True}], msg="$regexMatch should match from document field references" + ) + + +# Property [Return Type]: result is always a boolean. +REGEXMATCH_RETURN_TYPE_TESTS: list[RegexMatchTest] = [ + RegexMatchTest( + "return_type_match", + input="hello", + regex="hello", + expected=True, + msg="$regexMatch should return bool type on match", + ), + RegexMatchTest( + "return_type_no_match", + input="hello", + regex="xyz", + expected=False, + msg="$regexMatch should return bool type on no match", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(REGEXMATCH_RETURN_TYPE_TESTS)) +def test_regexmatch_return_type(collection, test_case: RegexMatchTest): + """Test $regexMatch result is always a boolean.""" + expr = _expr(test_case) + result = execute_project(collection, {"resultType": {"$type": expr}, "result": expr}) + assertSuccess(result, [{"resultType": "bool", "result": test_case.expected}], msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/utils/regexMatch_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/utils/regexMatch_common.py new file mode 100644 index 00000000..5bfda64d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/utils/regexMatch_common.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from documentdb_tests.framework.test_case import BaseTestCase + +# Sentinel for "omit this parameter from the expression." Distinct from None (which means pass null) +# and MISSING (which means reference a missing field). +_OMIT = object() + + +@dataclass(frozen=True) +class RegexMatchTest(BaseTestCase): + """Test case for $regexMatch operator.""" + + input: Any = None + regex: Any = None + options: Any = _OMIT + expr: Any = None # Raw expression override for syntax tests + + +def _expr(test_case: RegexMatchTest) -> dict[str, Any]: + if test_case.expr is not None: + return cast(dict[str, Any], test_case.expr) + params: dict[str, Any] = {"input": test_case.input, "regex": test_case.regex} + if test_case.options is not _OMIT: + params["options"] = test_case.options + return {"$regexMatch": params} From fc777ff9dd90641ae37beccca5fea8f502c68f31 Mon Sep 17 00:00:00 2001 From: Yunxuan Shi Date: Thu, 9 Apr 2026 16:24:05 -0700 Subject: [PATCH 2/2] Add missing dependencies for $regex* tests - Add __init__.py for package resolution - Add regex error codes to error_codes.py - Add BSON_TO_STRING_CONVERSION_ERROR, STRING_SIZE_LIMIT_ERROR, FAILED_TO_PARSE_ERROR, INVALID_DOLLAR_FIELD_PATH - Add STRING_SIZE_LIMIT_BYTES, REGEX_PATTERN_LIMIT_BYTES to test_constants.py - Fix pytest_params import (parametrize module) - Use relative imports for operator common utils - Pin CI MongoDB to 8.2.4 - Run isort/black formatting Signed-off-by: Yunxuan Shi --- .github/workflows/pr-tests.yml | 2 +- .../compatibility/tests/core/__init__.py | 0 .../tests/core/operator/__init__.py | 0 .../core/operator/expressions/__init__.py | 0 .../operator/expressions/string/__init__.py | 0 .../expressions/string/regexFind/__init__.py | 0 .../regexFind/test_regexFind_invalid_args.py | 13 ++++++++----- .../regexFind/test_regexFind_matching.py | 13 ++++++++----- .../string/regexFind/test_regexFind_null.py | 13 ++++++++----- .../string/regexFind/test_regexFind_options.py | 13 ++++++++----- .../regexFind/test_regexFind_precedence.py | 13 ++++++++----- .../regexFind/test_regexFind_size_limit.py | 18 ++++++++++++------ .../regexFind/test_regexFind_type_errors.py | 13 ++++++++----- .../string/regexFind/test_regexFind_usage.py | 17 ++++++++++------- .../string/regexFindAll/__init__.py | 0 .../regexFindAll/test_regexFindAll_encoding.py | 13 ++++++++----- .../test_regexFindAll_invalid_args.py | 13 ++++++++----- .../test_regexFindAll_invariants.py | 14 ++++++++------ .../regexFindAll/test_regexFindAll_matching.py | 13 ++++++++----- .../regexFindAll/test_regexFindAll_null.py | 13 ++++++++----- .../regexFindAll/test_regexFindAll_options.py | 13 ++++++++----- .../test_regexFindAll_precedence.py | 13 ++++++++----- .../test_regexFindAll_size_limit.py | 18 ++++++++++++------ .../test_regexFindAll_type_errors.py | 13 ++++++++----- .../regexFindAll/test_regexFindAll_usage.py | 16 +++++++++------- .../expressions/string/regexMatch/__init__.py | 0 .../regexMatch/test_regexMatch_invalid_args.py | 13 ++++++++----- .../regexMatch/test_regexMatch_matching.py | 13 ++++++++----- .../string/regexMatch/test_regexMatch_null.py | 13 ++++++++----- .../regexMatch/test_regexMatch_options.py | 13 ++++++++----- .../regexMatch/test_regexMatch_precedence.py | 13 ++++++++----- .../regexMatch/test_regexMatch_size_limit.py | 18 ++++++++++++------ .../regexMatch/test_regexMatch_type_errors.py | 13 ++++++++----- .../string/regexMatch/test_regexMatch_usage.py | 13 +++++++------ documentdb_tests/framework/error_codes.py | 16 ++++++++++++++++ documentdb_tests/framework/test_constants.py | 2 ++ 36 files changed, 241 insertions(+), 140 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/__init__.py diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index ccbf21ac..37026f58 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -11,7 +11,7 @@ jobs: services: mongodb: - image: mongo:8.2 + image: mongo:8.2.4 ports: - 27017:27017 options: >- diff --git a/documentdb_tests/compatibility/tests/core/__init__.py b/documentdb_tests/compatibility/tests/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/__init__.py b/documentdb_tests/compatibility/tests/core/operator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py index 9c8fd0a6..f487b46b 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_invalid_args.py @@ -3,7 +3,10 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( FAILED_TO_PARSE_ERROR, INVALID_DOLLAR_FIELD_PATH, @@ -17,12 +20,12 @@ REGEX_OPTIONS_NULL_BYTE_ERROR, REGEX_UNKNOWN_FIELD_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFind_common import ( RegexFindTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Syntax Validation]: missing required fields or unknown fields produce errors. REGEXFIND_SYNTAX_ERROR_TESTS: list[RegexFindTest] = [ @@ -268,6 +271,6 @@ def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind invalid argument cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py index 81fc8e97..0e2fd143 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_matching.py @@ -2,13 +2,16 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFind_common import ( RegexFindTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [First Match Only]: only the first match in the input string is returned. REGEXFIND_FIRST_MATCH_TESTS: list[RegexFindTest] = [ @@ -281,6 +284,6 @@ def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind matching behavior cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py index 128ab39f..5874c8ea 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_null.py @@ -2,14 +2,17 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + +from .utils.regexFind_common import ( RegexFindTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression _PLACEHOLDER = object() _NULL_PATTERNS = [ @@ -136,6 +139,6 @@ def _build_null_tests(null_value, prefix) -> list[RegexFindTest]: def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind null propagation cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py index 0c7ed796..2bc12316 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_options.py @@ -3,13 +3,16 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFind_common import ( RegexFindTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Regex Options]: options field accepts i, m, s, x as a string or BSON Regex flags. # Empty string, duplicates, and "u" (PCRE UTF-8 flag) are valid. Invalid BSON Regex flags are @@ -162,6 +165,6 @@ def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind options cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py index 70094f6a..637702ed 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_precedence.py @@ -3,7 +3,10 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( REGEX_BAD_OPTION_ERROR, REGEX_BAD_PATTERN_ERROR, @@ -12,13 +15,13 @@ REGEX_OPTIONS_TYPE_ERROR, REGEX_REGEX_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + +from .utils.regexFind_common import ( RegexFindTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Error Precedence]: options type errors take precedence over null propagation. # When both input and regex have wrong types, regex error wins. Wrong-type arguments error even @@ -121,6 +124,6 @@ def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind error precedence cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py index 6651c98b..0c9eb6a0 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_size_limit.py @@ -2,15 +2,21 @@ import pytest -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import REGEX_BAD_PATTERN_ERROR, STRING_SIZE_LIMIT_ERROR -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.framework.test_constants import REGEX_PATTERN_LIMIT_BYTES, STRING_SIZE_LIMIT_BYTES -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + REGEX_PATTERN_LIMIT_BYTES, + STRING_SIZE_LIMIT_BYTES, +) + +from .utils.regexFind_common import ( RegexFindTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [String Size Limit - Success]: input one byte under the limit is accepted. REGEXFIND_SIZE_LIMIT_SUCCESS_TESTS: list[RegexFindTest] = [ @@ -58,6 +64,6 @@ def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind size limit cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py index 0895fa71..f61ae8f2 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_type_errors.py @@ -5,19 +5,22 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( REGEX_INPUT_TYPE_ERROR, REGEX_OPTIONS_TYPE_ERROR, REGEX_REGEX_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( + +from .utils.regexFind_common import ( RegexFindTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Strictness - input]: non-string, non-null input produces an error. REGEXFIND_INPUT_TYPE_TESTS: list[RegexFindTest] = [ @@ -385,6 +388,6 @@ def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind type error cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py index d3284b1d..5f6cdf7d 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFind/test_regexFind_usage.py @@ -5,17 +5,20 @@ import pytest -from documentdb_tests.framework.assertions import assertResult, assertSuccess -from documentdb_tests.framework.test_case import BaseTestCase, pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFind.utils.regexFind_common import ( - RegexFindTest, - _expr, -) from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, execute_expression, execute_project, execute_project_with_insert, ) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_case import BaseTestCase + +from .utils.regexFind_common import ( + RegexFindTest, + _expr, +) # Property [Document Field References]: $regexFind works with field references @@ -145,6 +148,6 @@ def test_regexfind_return_type(collection, test_case: RegexFindReturnTypeTest): def test_regexfind_cases(collection, test_case: RegexFindTest): """Test $regexFind usage cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py index 6e22878d..c012f4c7 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_encoding.py @@ -2,13 +2,16 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [idx Code Point Semantics]: idx counts Unicode code points, not bytes. Each character # contributes 1 regardless of UTF-8 byte width. @@ -145,6 +148,6 @@ def test_regexfindall_encoding(collection, test_case: RegexFindAllTest): """Test $regexFindAll Unicode encoding and idx code point semantics.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py index f09af115..a41c6b9a 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invalid_args.py @@ -2,7 +2,10 @@ import pytest -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( FAILED_TO_PARSE_ERROR, INVALID_DOLLAR_FIELD_PATH, @@ -15,12 +18,12 @@ REGEX_OPTIONS_NULL_BYTE_ERROR, REGEX_UNKNOWN_FIELD_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Syntax Validation]: missing required fields or unknown fields produce errors. REGEXFINDALL_SYNTAX_ERROR_TESTS: list[RegexFindAllTest] = [ @@ -244,6 +247,6 @@ def test_regexfindall_invalid_args(collection, test_case: RegexFindAllTest): """Test $regexFindAll syntax validation and invalid argument errors.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py index d757abc8..4bf87873 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_invariants.py @@ -5,16 +5,18 @@ import pytest -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.test_case import BaseTestCase, pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( - RegexFindAllTest, - _expr, -) from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( execute_expression, execute_project, ) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_case import BaseTestCase + +from .utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) # Property [Return Type - array]: result is always an array, even for no-match and null-propagation # cases. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py index 542387e1..02af66a4 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_matching.py @@ -2,13 +2,16 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Captures Behavior]: captures array length equals the number of capture groups, in # pattern order. Unmatched branches produce null. Non-capturing groups are excluded. Nested groups @@ -296,6 +299,6 @@ def test_regexfindall_matching(collection, test_case: RegexFindAllTest): """Test $regexFindAll matching behavior.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py index a6527a94..4facdcb2 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_null.py @@ -2,14 +2,17 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Null Propagation]: null in input or regex causes the result to be an empty array. _PLACEHOLDER = object() @@ -120,6 +123,6 @@ def _build_null_tests(null_value, prefix) -> list[RegexFindAllTest]: def test_regexfindall_null(collection, test_case: RegexFindAllTest): """Test $regexFindAll null propagation cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py index 039163c9..aa199aad 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_options.py @@ -3,13 +3,16 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Regex Options]: options field accepts i, m, s, x as a string or BSON Regex flags. Empty # string, duplicates, and "u" (PCRE UTF-8 flag) are valid. Invalid BSON Regex flags are silently @@ -190,6 +193,6 @@ def test_regexfindall_options(collection, test_case: RegexFindAllTest): """Test $regexFindAll regex options behavior.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py index 6608c330..1beb65bb 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_precedence.py @@ -3,7 +3,10 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( REGEX_BAD_OPTION_ERROR, REGEX_BAD_PATTERN_ERROR, @@ -12,13 +15,13 @@ REGEX_OPTIONS_TYPE_ERROR, REGEX_REGEX_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Null Precedence]: null propagation from regex takes precedence over bad option flag # validation. @@ -184,6 +187,6 @@ def test_regexfindall_precedence(collection, test_case: RegexFindAllTest): """Test $regexFindAll error precedence and options conflict.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py index 22572e55..12299a44 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_size_limit.py @@ -2,15 +2,21 @@ import pytest -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import REGEX_BAD_PATTERN_ERROR, STRING_SIZE_LIMIT_ERROR -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.framework.test_constants import REGEX_PATTERN_LIMIT_BYTES, STRING_SIZE_LIMIT_BYTES -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + REGEX_PATTERN_LIMIT_BYTES, + STRING_SIZE_LIMIT_BYTES, +) + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [String Size Limit - Success]: input one byte under the limit is accepted. REGEXFINDALL_SIZE_LIMIT_SUCCESS_TESTS: list[RegexFindAllTest] = [ @@ -69,6 +75,6 @@ def test_regexfindall_size_limit(collection, test_case: RegexFindAllTest): """Test $regexFindAll string size limit behavior.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py index d48fa6cc..fda37ede 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_type_errors.py @@ -5,19 +5,22 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( REGEX_INPUT_TYPE_ERROR, REGEX_OPTIONS_TYPE_ERROR, REGEX_REGEX_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( + +from .utils.regexFindAll_common import ( RegexFindAllTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Strictness - input]: non-string, non-null input produces an error. REGEXFINDALL_INPUT_TYPE_TESTS: list[RegexFindAllTest] = [ @@ -385,6 +388,6 @@ def test_regexfindall_type_errors(collection, test_case: RegexFindAllTest): """Test $regexFindAll type strictness for input, regex, and options.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py index b4d6c6fa..80a32fba 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexFindAll/test_regexFindAll_usage.py @@ -2,16 +2,18 @@ import pytest -from documentdb_tests.framework.assertions import assertResult, assertSuccess -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexFindAll.utils.regexFindAll_common import ( - RegexFindAllTest, - _expr, -) from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, execute_expression, execute_project_with_insert, ) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexFindAll_common import ( + RegexFindAllTest, + _expr, +) # Property [Expression Arguments]: input, regex, and options accept expressions that resolve to the # appropriate type. @@ -69,7 +71,7 @@ def test_regexfindall_usage(collection, test_case: RegexFindAllTest): """Test $regexFindAll expression arguments and field reference behavior.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py index 0b0bcceb..579bff3f 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_invalid_args.py @@ -3,7 +3,10 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( FAILED_TO_PARSE_ERROR, INVALID_DOLLAR_FIELD_PATH, @@ -17,12 +20,12 @@ REGEX_OPTIONS_NULL_BYTE_ERROR, REGEX_UNKNOWN_FIELD_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexMatch_common import ( RegexMatchTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Syntax Validation]: missing required fields or unknown fields produce errors. REGEXMATCH_SYNTAX_ERROR_TESTS: list[RegexMatchTest] = [ @@ -273,6 +276,6 @@ def test_regexmatch_cases(collection, test_case: RegexMatchTest): """Test $regexMatch invalid argument cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py index f361cefd..e67b5d1d 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_matching.py @@ -2,13 +2,16 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexMatch_common import ( RegexMatchTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Expression Arguments]: input, regex, and options accept expressions that resolve to # the appropriate type. @@ -160,6 +163,6 @@ def test_regexmatch_cases(collection, test_case: RegexMatchTest): """Test $regexMatch matching behavior cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py index f2db54cc..ff968531 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_null.py @@ -3,14 +3,17 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + +from .utils.regexMatch_common import ( RegexMatchTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression _PLACEHOLDER = object() _NULL_PATTERNS = [ @@ -148,6 +151,6 @@ def _build_null_tests(null_value, prefix) -> list[RegexMatchTest]: def test_regexmatch_cases(collection, test_case: RegexMatchTest): """Test $regexMatch null propagation cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py index 9d90894e..21f4bc9a 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_options.py @@ -3,13 +3,16 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexMatch_common import ( RegexMatchTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Regex Options]: options field accepts i, m, s, x as a string or BSON Regex flags. # Empty string, duplicates, and "u" (PCRE UTF-8 flag) are valid. Invalid BSON Regex flags are @@ -169,6 +172,6 @@ def test_regexmatch_cases(collection, test_case: RegexMatchTest): """Test $regexMatch options cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py index 5d498a6e..99d85880 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_precedence.py @@ -3,7 +3,10 @@ import pytest from bson import Regex -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( REGEX_BAD_OPTION_ERROR, REGEX_BAD_PATTERN_ERROR, @@ -12,13 +15,13 @@ REGEX_OPTIONS_TYPE_ERROR, REGEX_REGEX_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + +from .utils.regexMatch_common import ( RegexMatchTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Error Precedence]: options type errors take precedence over null propagation. # When both input and regex have wrong types, regex error wins. Wrong-type arguments error even @@ -121,6 +124,6 @@ def test_regexmatch_cases(collection, test_case: RegexMatchTest): """Test $regexMatch type error precedence cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py index ddb73277..03cdc329 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_size_limit.py @@ -2,15 +2,21 @@ import pytest -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import REGEX_BAD_PATTERN_ERROR, STRING_SIZE_LIMIT_ERROR -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.framework.test_constants import REGEX_PATTERN_LIMIT_BYTES, STRING_SIZE_LIMIT_BYTES -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + REGEX_PATTERN_LIMIT_BYTES, + STRING_SIZE_LIMIT_BYTES, +) + +from .utils.regexMatch_common import ( RegexMatchTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [String Size Limit - Success]: input one byte under the limit is accepted. REGEXMATCH_SIZE_LIMIT_SUCCESS_TESTS: list[RegexMatchTest] = [ @@ -58,6 +64,6 @@ def test_regexmatch_cases(collection, test_case: RegexMatchTest): """Test $regexMatch size limit cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py index 3a062e2d..07bed54e 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_type_errors.py @@ -5,19 +5,22 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp -from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + assert_expression_result, + execute_expression, +) from documentdb_tests.framework.error_codes import ( REGEX_INPUT_TYPE_ERROR, REGEX_OPTIONS_TYPE_ERROR, REGEX_REGEX_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( + +from .utils.regexMatch_common import ( RegexMatchTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Strictness - input]: non-string, non-null input produces an error. REGEXMATCH_INPUT_TYPE_TESTS: list[RegexMatchTest] = [ @@ -385,6 +388,6 @@ def test_regexmatch_cases(collection, test_case: RegexMatchTest): """Test $regexMatch type strictness cases.""" result = execute_expression(collection, _expr(test_case)) - assertResult( + assert_expression_result( result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py index 2e892f1c..417461c0 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/regexMatch/test_regexMatch_usage.py @@ -2,16 +2,17 @@ import pytest -from documentdb_tests.framework.assertions import assertSuccess -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.regexMatch.utils.regexMatch_common import ( - RegexMatchTest, - _expr, -) from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( execute_project, execute_project_with_insert, ) +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.regexMatch_common import ( + RegexMatchTest, + _expr, +) # Property [Document Field References]: $regexMatch works with field references diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index c285b5aa..cc939867 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -4,11 +4,15 @@ """ DIVIDE_BY_ZERO_ERROR = 2 +FAILED_TO_PARSE_ERROR = 9 TYPE_MISMATCH_ERROR = 14 +BSON_TO_STRING_CONVERSION_ERROR = 16007 EXPRESSION_TYPE_MISMATCH_ERROR = 16020 +STRING_SIZE_LIMIT_ERROR = 16493 MODULO_ZERO_REMAINDER_ERROR = 16610 MODULO_NON_NUMERIC_ERROR = 16611 MORE_THAN_ONE_DATE_ERROR = 16612 +INVALID_DOLLAR_FIELD_PATH = 16872 ABS_OVERFLOW_ERROR = 28680 LOG_NON_NUMERIC_VALUE_ERROR = 28756 LOG_NON_NUMERIC_BASE_ERROR = 28757 @@ -20,4 +24,16 @@ POW_BASE_ZERO_EXP_NEGATIVE_ERROR = 28764 NON_NUMERIC_TYPE_MISMATCH_ERROR = 28765 LN_NON_POSITIVE_INPUT_ERROR = 28766 +REGEX_MISSING_INPUT_ERROR = 31022 +REGEX_MISSING_REGEX_ERROR = 31023 +REGEX_UNKNOWN_FIELD_ERROR = 31024 +REGEX_NON_OBJECT_ERROR = 51103 +REGEX_INPUT_TYPE_ERROR = 51104 +REGEX_REGEX_TYPE_ERROR = 51105 +REGEX_OPTIONS_TYPE_ERROR = 51106 +REGEX_OPTIONS_CONFLICT_ERROR = 51107 +REGEX_BAD_OPTION_ERROR = 51108 +REGEX_NULL_BYTE_ERROR = 51109 +REGEX_OPTIONS_NULL_BYTE_ERROR = 51110 +REGEX_BAD_PATTERN_ERROR = 51111 MODULO_DECIMAL128_ZERO_REMAINDER_ERROR = 5733415 diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py index 9e3088ed..18b813d8 100644 --- a/documentdb_tests/framework/test_constants.py +++ b/documentdb_tests/framework/test_constants.py @@ -64,6 +64,8 @@ # Other constant values MISSING = "$missing" +STRING_SIZE_LIMIT_BYTES = 16 * 1024 * 1024 +REGEX_PATTERN_LIMIT_BYTES = 16 * 1024 # Int32 lists NUMERIC_INT32_NEGATIVE = [INT32_UNDERFLOW, INT32_MIN]