From 7722b4cb6cc4eef40470a96bca40de0538d0101b Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Tue, 31 Mar 2026 16:57:00 +0000 Subject: [PATCH 1/2] Add string operator trim variant tests Signed-off-by: Daniel Frankcom --- .../string/ltrim/test_ltrim_custom_chars.py | 177 ++++++++++ .../string/ltrim/test_ltrim_default_trim.py | 298 ++++++++++++++++ .../string/ltrim/test_ltrim_encoding.py | 173 ++++++++++ .../string/ltrim/test_ltrim_input_forms.py | 104 ++++++ .../string/ltrim/test_ltrim_invalid_args.py | 205 +++++++++++ .../string/ltrim/test_ltrim_invariants.py | 252 ++++++++++++++ .../string/ltrim/test_ltrim_null.py | 74 ++++ .../string/ltrim/test_ltrim_size_limit.py | 66 ++++ .../string/ltrim/test_ltrim_type_errors.py | 279 +++++++++++++++ .../string/ltrim/utils/__init__.py | 0 .../string/ltrim/utils/ltrim_common.py | 28 ++ .../string/rtrim/test_rtrim_custom_chars.py | 177 ++++++++++ .../string/rtrim/test_rtrim_default_trim.py | 298 ++++++++++++++++ .../string/rtrim/test_rtrim_encoding.py | 173 ++++++++++ .../string/rtrim/test_rtrim_input_forms.py | 104 ++++++ .../string/rtrim/test_rtrim_invalid_args.py | 205 +++++++++++ .../string/rtrim/test_rtrim_invariants.py | 245 +++++++++++++ .../string/rtrim/test_rtrim_null.py | 74 ++++ .../string/rtrim/test_rtrim_size_limit.py | 66 ++++ .../string/rtrim/test_rtrim_type_errors.py | 279 +++++++++++++++ .../string/rtrim/utils/__init__.py | 0 .../string/rtrim/utils/rtrim_common.py | 28 ++ .../string/trim/test_trim_custom_chars.py | 137 ++++++++ .../string/trim/test_trim_default_trim.py | 323 ++++++++++++++++++ .../string/trim/test_trim_encoding.py | 174 ++++++++++ .../string/trim/test_trim_input_forms.py | 104 ++++++ .../string/trim/test_trim_invalid_args.py | 205 +++++++++++ .../string/trim/test_trim_invariants.py | 305 +++++++++++++++++ .../expressions/string/trim/test_trim_null.py | 74 ++++ .../string/trim/test_trim_size_limit.py | 67 ++++ .../string/trim/test_trim_type_errors.py | 279 +++++++++++++++ .../expressions/string/trim/utils/__init__.py | 0 .../string/trim/utils/trim_common.py | 28 ++ 33 files changed, 5001 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/utils/ltrim_common.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/utils/rtrim_common.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/utils/trim_common.py diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py new file mode 100644 index 00000000..0200612f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Custom Chars]: when chars is provided, only those characters are trimmed from the +# leading edge. Each character in chars is treated individually (not as a substring), and the +# order of characters in chars does not affect the result. +LTRIM_CUSTOM_CHARS_TESTS: list[LtrimTest] = [ + LtrimTest( + "custom_single_char", + input="aaahello", + chars="a", + expected="hello", + msg="$ltrim should trim leading occurrences of a single custom char", + ), + LtrimTest( + "custom_repeated_single", + input="xxxhello", + chars="x", + expected="hello", + msg="$ltrim should trim repeated leading custom char", + ), + # Custom chars completely replaces the default whitespace set. + LtrimTest( + "custom_spaces_not_trimmed", + input=" xxxhello", + chars="x", + expected=" xxxhello", + msg="$ltrim should not trim spaces when custom chars replaces default set", + ), + LtrimTest( + "custom_tabs_not_trimmed", + input="\t\txxxhello", + chars="x", + expected="\t\txxxhello", + msg="$ltrim should not trim tabs when custom chars replaces default set", + ), + # Duplicate characters in chars have no additional effect. + LtrimTest( + "custom_duplicate_chars", + input="aaahello", + chars="aab", + expected="hello", + msg="$ltrim should ignore duplicate characters in chars", + ), + # Each character in chars is treated individually, not as a substring pattern. + LtrimTest( + "custom_individual_any_order", + input="cbahello", + chars="abc", + expected="hello", + msg="$ltrim should treat each char in chars individually, not as a substring", + ), + LtrimTest( + "custom_individual_mixed_repeats", + input="aabbcchello", + chars="abc", + expected="hello", + msg="$ltrim should trim mixed repeats of individual chars", + ), + # Characters not in chars are preserved. + LtrimTest( + "custom_no_match", + input="xhello", + chars="a", + expected="xhello", + msg="$ltrim should preserve leading chars not in the trim set", + ), + # Order of characters in chars does not affect the result. + LtrimTest( + "custom_order_abc", + input="bcaahello", + chars="abc", + expected="hello", + msg="$ltrim should produce same result regardless of chars order (abc)", + ), + LtrimTest( + "custom_order_cba", + input="bcaahello", + chars="cba", + expected="hello", + msg="$ltrim should produce same result regardless of chars order (cba)", + ), + LtrimTest( + "custom_order_bac", + input="bcaahello", + chars="bac", + expected="hello", + msg="$ltrim should produce same result regardless of chars order (bac)", + ), +] + + +# Property [Directionality]: only leading (left-side) characters are trimmed. Trailing characters +# matching the trim set are preserved. +LTRIM_DIRECTIONALITY_TESTS: list[LtrimTest] = [ + LtrimTest( + "dir_trailing_spaces", + input="hello ", + expected="hello ", + msg="$ltrim should preserve trailing spaces", + ), + LtrimTest( + "dir_leading_trimmed_trailing_kept", + input=" hello ", + expected="hello ", + msg="$ltrim should trim leading spaces while preserving trailing spaces", + ), + LtrimTest( + "dir_custom_trailing_kept", + input="aaahelloaaa", + chars="a", + expected="helloaaa", + msg="$ltrim should trim leading custom chars while preserving trailing ones", + ), + LtrimTest( + "dir_trailing_tab_kept", + input="\thello\t", + expected="hello\t", + msg="$ltrim should trim leading tab while preserving trailing tab", + ), + # First character not in trim set stops all trimming. + LtrimTest( + "dir_first_char_not_in_set", + input="hxexlxlxo", + chars="x", + expected="hxexlxlxo", + msg="$ltrim should not trim when first character is not in the trim set", + ), +] + +# Property [Whitespace Subset Chars]: when chars is a subset of whitespace characters, only +# those specific whitespace characters are trimmed; other whitespace is preserved. +LTRIM_WHITESPACE_SUBSET_TESTS: list[LtrimTest] = [ + LtrimTest( + "subset_tab_only", + input="\t hello \t", + chars="\t", + expected=" hello \t", + msg="$ltrim with chars=tab should trim only leading tabs, preserving spaces", + ), + LtrimTest( + "subset_space_tab", + input=" \t\nhello\n\t ", + chars=" \t", + expected="\nhello\n\t ", + msg="$ltrim with chars=space+tab should preserve leading newlines", + ), + LtrimTest( + "subset_newline_only", + input="\n\n hello", + chars="\n", + expected=" hello", + msg="$ltrim with chars=newline should trim only leading newlines, preserving spaces", + ), +] + +LTRIM_CUSTOM_CHARS_ALL_TESTS = ( + LTRIM_CUSTOM_CHARS_TESTS + LTRIM_DIRECTIONALITY_TESTS + LTRIM_WHITESPACE_SUBSET_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_CUSTOM_CHARS_ALL_TESTS)) +def test_ltrim_custom_chars(collection, test_case: LtrimTest): + """Test $ltrim custom chars and directionality cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py new file mode 100644 index 00000000..31183758 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Default Trimming]: when chars is omitted, leading whitespace is trimmed. The default +# set includes ASCII whitespace (space, tab, newline, carriage return, form feed, vertical tab), +# the null byte (U+0000), non-breaking space (U+00A0), and Unicode "Zs" category spaces. +LTRIM_DEFAULT_TRIM_TESTS: list[LtrimTest] = [ + # ASCII whitespace characters. + LtrimTest( + "default_space", + input=" hello", + expected="hello", + msg="$ltrim should trim leading space by default", + ), + LtrimTest( + "default_tab", + input="\thello", + expected="hello", + msg="$ltrim should trim leading tab by default", + ), + LtrimTest( + "default_newline", + input="\nhello", + expected="hello", + msg="$ltrim should trim leading newline by default", + ), + LtrimTest( + "default_cr", + input="\rhello", + expected="hello", + msg="$ltrim should trim leading carriage return by default", + ), + LtrimTest( + "default_form_feed", + input="\fhello", + expected="hello", + msg="$ltrim should trim leading form feed by default", + ), + LtrimTest( + "default_vertical_tab", + input="\x0bhello", + expected="hello", + msg="$ltrim should trim leading vertical tab by default", + ), + # Null byte (U+0000). + LtrimTest( + "default_null_byte", + input="\x00hello", + expected="hello", + msg="$ltrim should trim leading null byte by default", + ), + # Non-breaking space (U+00A0). + LtrimTest( + "default_nbsp", + input="\u00a0hello", + expected="hello", + msg="$ltrim should trim leading non-breaking space by default", + ), + # Unicode "Zs" category spaces. + LtrimTest( + "default_en_space", + input="\u2000hello", + expected="hello", + msg="$ltrim should trim leading en space (U+2000) by default", + ), + LtrimTest( + "default_em_space", + input="\u2003hello", + expected="hello", + msg="$ltrim should trim leading em space (U+2003) by default", + ), + LtrimTest( + "default_thin_space", + input="\u2009hello", + expected="hello", + msg="$ltrim should trim leading thin space (U+2009) by default", + ), + LtrimTest( + "default_hair_space", + input="\u200ahello", + expected="hello", + msg="$ltrim should trim leading hair space (U+200A) by default", + ), + LtrimTest( + "default_ogham_space", + input="\u1680hello", + expected="hello", + msg="$ltrim should trim leading ogham space (U+1680) by default", + ), + LtrimTest( + "default_em_quad", + input="\u2001hello", + expected="hello", + msg="$ltrim should trim leading em quad (U+2001) by default", + ), + LtrimTest( + "default_en_space_2002", + input="\u2002hello", + expected="hello", + msg="$ltrim should trim leading en space (U+2002) by default", + ), + LtrimTest( + "default_three_per_em", + input="\u2004hello", + expected="hello", + msg="$ltrim should trim leading three-per-em space (U+2004) by default", + ), + LtrimTest( + "default_four_per_em", + input="\u2005hello", + expected="hello", + msg="$ltrim should trim leading four-per-em space (U+2005) by default", + ), + LtrimTest( + "default_six_per_em", + input="\u2006hello", + expected="hello", + msg="$ltrim should trim leading six-per-em space (U+2006) by default", + ), + LtrimTest( + "default_figure_space", + input="\u2007hello", + expected="hello", + msg="$ltrim should trim leading figure space (U+2007) by default", + ), + LtrimTest( + "default_punctuation_space", + input="\u2008hello", + expected="hello", + msg="$ltrim should trim leading punctuation space (U+2008) by default", + ), + # Multiple mixed whitespace characters. + LtrimTest( + "default_mixed_ascii_whitespace", + input=" \t\n\r\f\x0bhello", + expected="hello", + msg="$ltrim should trim all mixed ASCII whitespace from leading edge", + ), + LtrimTest( + "default_mixed_unicode_whitespace", + input="\u00a0\u2000\u2003hello", + expected="hello", + msg="$ltrim should trim mixed Unicode whitespace from leading edge", + ), + LtrimTest( + "default_mixed_ascii_and_unicode", + input=" \t\u00a0\u2000hello", + expected="hello", + msg="$ltrim should trim mixed ASCII and Unicode whitespace from leading edge", + ), + # All 20 default whitespace code points as a leading prefix. + LtrimTest( + "default_all_20_mixed", + input=( + "\x00\t\n\x0b\f\r \u00a0\u1680\u2000\u2001\u2002" + "\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200ahello" + ), + expected="hello", + msg="$ltrim should trim all 20 default whitespace code points from leading edge", + ), +] + +# Property [Default Trimming - exclusions]: characters not in the default whitespace set are +# preserved as leading characters. +LTRIM_DEFAULT_TRIM_EXCLUSION_TESTS: list[LtrimTest] = [ + # Zero-width space (U+200B). + LtrimTest( + "default_excl_zwsp", + input="\u200bhello", + expected="\u200bhello", + msg="$ltrim should not trim zero-width space (U+200B) by default", + ), + # Line separator (U+2028). + LtrimTest( + "default_excl_line_separator", + input="\u2028hello", + expected="\u2028hello", + msg="$ltrim should not trim line separator (U+2028) by default", + ), + # Paragraph separator (U+2029). + LtrimTest( + "default_excl_paragraph_separator", + input="\u2029hello", + expected="\u2029hello", + msg="$ltrim should not trim paragraph separator (U+2029) by default", + ), + # Next line (U+0085). + LtrimTest( + "default_excl_next_line", + input="\u0085hello", + expected="\u0085hello", + msg="$ltrim should not trim next line (U+0085) by default", + ), + # Ideographic space (U+3000). + LtrimTest( + "default_excl_ideographic_space", + input="\u3000hello", + expected="\u3000hello", + msg="$ltrim should not trim ideographic space (U+3000) by default", + ), + # Narrow no-break space (U+202F). + LtrimTest( + "default_excl_narrow_nbsp", + input="\u202fhello", + expected="\u202fhello", + msg="$ltrim should not trim narrow no-break space (U+202F) by default", + ), + # Medium mathematical space (U+205F). + LtrimTest( + "default_excl_medium_math_space", + input="\u205fhello", + expected="\u205fhello", + msg="$ltrim should not trim medium mathematical space (U+205F) by default", + ), + # BOM / zero-width no-break space (U+FEFF). + LtrimTest( + "default_excl_bom", + input="\ufeffhello", + expected="\ufeffhello", + msg="$ltrim should not trim BOM / zero-width no-break space (U+FEFF) by default", + ), +] + + +# Property [Edge Cases]: the operator produces correct results at input extremes: empty strings, +# fully trimmable strings, etc. +LTRIM_EDGE_TESTS: list[LtrimTest] = [ + LtrimTest( + "edge_empty_default", + input="", + expected="", + msg="$ltrim should return empty string for empty input with default chars", + ), + LtrimTest( + "edge_empty_custom", + input="", + chars="abc", + expected="", + msg="$ltrim should return empty string for empty input with custom chars", + ), + LtrimTest( + "edge_all_whitespace", + input=" ", + expected="", + msg="$ltrim should return empty string when input is all whitespace", + ), + LtrimTest( + "edge_all_in_chars", + input="aaabbb", + chars="ab", + expected="", + msg="$ltrim should return empty string when all characters are in trim set", + ), + # Null byte in custom chars is not treated as a C-style string terminator. + LtrimTest( + "edge_null_byte_custom", + input="\x00\x00hello", + chars="\x00", + expected="hello", + msg="$ltrim should trim null bytes without treating them as C-string terminators", + ), + # Control character stops default trimming even if followed by whitespace. + LtrimTest( + "edge_control_char_stops_trim", + input="\x01 hello", + expected="\x01 hello", + msg="$ltrim should not trim control character U+0001 by default", + ), + # Leading space trimmed, control character stops further trimming. + LtrimTest( + "edge_space_then_control_char", + input=" \x01hello", + expected="\x01hello", + msg="$ltrim should trim leading space but stop at control character", + ), +] + +LTRIM_DEFAULT_TRIM_ALL_TESTS = ( + LTRIM_DEFAULT_TRIM_TESTS + LTRIM_DEFAULT_TRIM_EXCLUSION_TESTS + LTRIM_EDGE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_DEFAULT_TRIM_ALL_TESTS)) +def test_ltrim_default_trim(collection, test_case: LtrimTest): + """Test $ltrim default trimming and edge cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py new file mode 100644 index 00000000..001d5af8 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py @@ -0,0 +1,173 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Encoding and Character Handling]: trimming operates on whole Unicode code points, +# not bytes or substrings. +LTRIM_ENCODING_TESTS: list[LtrimTest] = [ + # 2-byte UTF-8 character (U+00E9, é). + LtrimTest( + "enc_2byte_trim", + input="\u00e9hello", + chars="\u00e9", + expected="hello", + msg="$ltrim should trim 2-byte UTF-8 character é (U+00E9)", + ), + # 3-byte UTF-8 character (U+2603, ☃). + LtrimTest( + "enc_3byte_trim", + input="\u2603hello", + chars="\u2603", + expected="hello", + msg="$ltrim should trim 3-byte UTF-8 character ☃ (U+2603)", + ), + # 4-byte UTF-8 character (U+1F600, 😀). + LtrimTest( + "enc_4byte_trim", + input="\U0001f600hello", + chars="\U0001f600", + expected="hello", + msg="$ltrim should trim 4-byte UTF-8 character 😀 (U+1F600)", + ), + # Mixed multi-byte characters in chars, each treated individually. + LtrimTest( + "enc_mixed_multibyte", + input="\u00e9\u2603\U0001f600hello", + chars="\U0001f600\u00e9\u2603", + expected="hello", + msg="$ltrim should trim mixed multi-byte characters individually", + ), + # Partial bytes do not match. Trimming "é" (U+00E9) should not affect "e" (U+0065). + LtrimTest( + "enc_partial_no_match", + input="ehello", + chars="\u00e9", + expected="ehello", + msg="$ltrim should not match partial byte sequences (é vs e)", + ), + # Regex-special characters treated as literals. + LtrimTest( + "enc_regex_special", + input=".*\\hello", + chars="\\*.", + expected="hello", + msg="$ltrim should treat regex-special characters as literals", + ), + LtrimTest( + "enc_regex_plus", + input="+++hello", + chars="+", + expected="hello", + msg="$ltrim should treat + as a literal character", + ), + LtrimTest( + "enc_regex_question", + input="???hello", + chars="?", + expected="hello", + msg="$ltrim should treat ? as a literal character", + ), + LtrimTest( + "enc_regex_brackets", + input="(([hello", + chars="([", + expected="hello", + msg="$ltrim should treat ( and [ as literal characters", + ), + # Trimming decomposed form: "e" in chars matches the base "e" independently of the + # following combining mark. + LtrimTest( + "enc_base_char_strips_from_decomposed", + input="e\u0301hello", + chars="e", + expected="\u0301hello", + msg="$ltrim should trim base char independently from decomposed sequence", + ), + # Combining mark (U+0301) is a valid code point and can be trimmed. + LtrimTest( + "enc_combining_mark_trim", + input="\u0301hello", + chars="\u0301", + expected="hello", + msg="$ltrim should trim standalone combining mark (U+0301)", + ), + # Precomposed é in input, decomposed chars "e" + combining acute. Each char in chars is + # individual, so neither "e" nor U+0301 matches U+00E9. + LtrimTest( + "enc_decomposed_chars_precomposed_input", + input="\u00e9hello", + chars="e\u0301", + expected="\u00e9hello", + msg="$ltrim should not match precomposed é with decomposed chars e+combining accent", + ), + # Case sensitivity: uppercase and lowercase are distinct code points. + LtrimTest( + "enc_case_lower_no_match", + input="ABChello", + chars="abc", + expected="ABChello", + msg="$ltrim should not trim uppercase when chars contains lowercase", + ), + LtrimTest( + "enc_case_upper_no_match", + input="abchello", + chars="ABC", + expected="abchello", + msg="$ltrim should not trim lowercase when chars contains uppercase", + ), + LtrimTest( + "enc_case_exact_match", + input="AbChello", + chars="AbC", + expected="hello", + msg="$ltrim should trim when case matches exactly", + ), + # Greek case sensitivity. + LtrimTest( + "enc_case_greek", + input="\u03c3hello", + chars="\u03a3", + expected="\u03c3hello", + msg="$ltrim should not fold Greek lowercase σ to uppercase Σ", + ), + # German sharp s (U+00DF) trimmed as single code point. + LtrimTest( + "enc_sharp_s", + input="\u00df\u00dfhello", + chars="\u00df", + expected="hello", + msg="$ltrim should trim ß (U+00DF) as a single code point", + ), + # Surrogate-adjacent code points handled correctly. + LtrimTest( + "enc_surrogate_adj_d7ff", + input="\ud7ffhello", + chars="\ud7ff", + expected="hello", + msg="$ltrim should handle surrogate-adjacent code point U+D7FF", + ), + LtrimTest( + "enc_surrogate_adj_e000", + input="\ue000hello", + chars="\ue000", + expected="hello", + msg="$ltrim should handle surrogate-adjacent code point U+E000", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_ENCODING_TESTS)) +def test_ltrim_encoding(collection, test_case: LtrimTest): + """Test $ltrim encoding and character handling cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py new file mode 100644 index 00000000..cbca9d28 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.expression_test_case import ExpressionTestCase +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_expression_with_insert, +) + +# Property [Expression Arguments]: input and chars accept any expression that resolves to a +# string. Nested $ltrim expressions are also accepted. +LTRIM_EXPR_TESTS: list[LtrimTest] = [ + # input is an expression. + LtrimTest( + "expr_input_concat", + input={"$concat": [" ", "hello"]}, + expected="hello", + msg="$ltrim should accept $concat expression as input", + ), + # chars is an expression. + LtrimTest( + "expr_chars_concat", + input="aaahello", + chars={"$concat": ["a"]}, + expected="hello", + msg="$ltrim should accept $concat expression as chars", + ), + # Both input and chars are expressions. + LtrimTest( + "expr_both", + input={"$concat": ["xx", "hello"]}, + chars={"$concat": ["x"]}, + expected="hello", + msg="$ltrim should accept expressions for both input and chars", + ), + # Nested $ltrim as input to another $ltrim. + LtrimTest( + "expr_nested_ltrim", + input={"$ltrim": {"input": " aahello"}}, + chars="a", + expected="hello", + msg="$ltrim should accept nested $ltrim as input expression", + ), + # $literal for dollar-prefixed strings. + LtrimTest( + "expr_literal_dollar", + input={"$literal": "$$$hello"}, + chars={"$literal": "$"}, + expected="hello", + msg="$ltrim should accept $literal for dollar-prefixed strings", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_EXPR_TESTS)) +def test_ltrim_input_forms(collection, test_case: LtrimTest): + """Test $ltrim expression argument cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Document Field References]: $ltrim works with field references +# from inserted documents, not just inline literals. +LTRIM_FIELD_REF_TESTS: list[ExpressionTestCase] = [ + # Object expression: both input and chars from simple field paths. + ExpressionTestCase( + "field_object", + expression={"$ltrim": {"input": "$s", "chars": "$c"}}, + doc={"s": "aaahello", "c": "a"}, + expected="hello", + msg="$ltrim should accept input and chars from document field paths", + ), + # Composite array: both from $arrayElemAt on a projected array-of-objects field. + ExpressionTestCase( + "field_composite_array", + expression={ + "$ltrim": { + "input": {"$arrayElemAt": ["$a.b", 0]}, + "chars": {"$arrayElemAt": ["$a.b", 1]}, + } + }, + doc={"a": [{"b": "aaahello"}, {"b": "a"}]}, + expected="hello", + msg="$ltrim should accept input and chars from composite array field paths", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_FIELD_REF_TESTS)) +def test_ltrim_field_refs(collection, test_case: ExpressionTestCase): + """Test $ltrim with document field references.""" + result = execute_expression_with_insert(collection, test_case.expression, test_case.doc) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py new file mode 100644 index 00000000..dedbe58e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, + TRIM_MISSING_INPUT_ERROR, + TRIM_UNKNOWN_FIELD_ERROR, + TRIM_WRONG_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Syntax Validation]: invalid $ltrim object shapes produce errors. +LTRIM_SYNTAX_ERROR_TESTS: list[LtrimTest] = [ + # Non-document arguments produce TRIM_WRONG_TYPE_ERROR. + LtrimTest( + "syntax_string", + expr={"$ltrim": "hello"}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject string as argument", + ), + LtrimTest( + "syntax_array", + expr={"$ltrim": ["hello"]}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject array as argument", + ), + LtrimTest( + "syntax_null", + expr={"$ltrim": None}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject null as argument", + ), + LtrimTest( + "syntax_int", + expr={"$ltrim": 42}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject int as argument", + ), + LtrimTest( + "syntax_bool", + expr={"$ltrim": True}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject boolean as argument", + ), + LtrimTest( + "syntax_float", + expr={"$ltrim": 3.14}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject float as argument", + ), + LtrimTest( + "syntax_long", + expr={"$ltrim": Int64(42)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Int64 as argument", + ), + LtrimTest( + "syntax_binary", + expr={"$ltrim": Binary(b"data")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Binary as argument", + ), + LtrimTest( + "syntax_binary_uuid", + expr={"$ltrim": Binary(b"data", 4)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Binary UUID as argument", + ), + LtrimTest( + "syntax_date", + expr={"$ltrim": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject datetime as argument", + ), + LtrimTest( + "syntax_decimal128", + expr={"$ltrim": DECIMAL128_ONE_AND_HALF}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Decimal128 as argument", + ), + LtrimTest( + "syntax_maxkey", + expr={"$ltrim": MaxKey()}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject MaxKey as argument", + ), + LtrimTest( + "syntax_minkey", + expr={"$ltrim": MinKey()}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject MinKey as argument", + ), + LtrimTest( + "syntax_objectid", + expr={"$ltrim": ObjectId("507f1f77bcf86cd799439011")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject ObjectId as argument", + ), + LtrimTest( + "syntax_regex", + expr={"$ltrim": Regex("pattern")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Regex as argument", + ), + LtrimTest( + "syntax_timestamp", + expr={"$ltrim": Timestamp(1, 1)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Timestamp as argument", + ), + LtrimTest( + "syntax_code", + expr={"$ltrim": Code("function() {}")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Code as argument", + ), + LtrimTest( + "syntax_code_scope", + expr={"$ltrim": Code("function() {}", {"x": 1})}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$ltrim should reject Code with scope as argument", + ), + # Omitting input entirely. + LtrimTest( + "syntax_no_input", + expr={"$ltrim": {"chars": "a"}}, + error_code=TRIM_MISSING_INPUT_ERROR, + msg="$ltrim should require the input field", + ), + # Empty object. + LtrimTest( + "syntax_empty_object", + expr={"$ltrim": {}}, + error_code=TRIM_MISSING_INPUT_ERROR, + msg="$ltrim should reject empty object", + ), + # Unknown extra field. + LtrimTest( + "syntax_unknown_field", + expr={"$ltrim": {"input": "hello", "unknown": 1}}, + error_code=TRIM_UNKNOWN_FIELD_ERROR, + msg="$ltrim should reject unknown fields in the argument object", + ), + # Case-sensitive field names. + LtrimTest( + "syntax_case_sensitive_field", + expr={"$ltrim": {"Input": "hello"}}, + error_code=TRIM_UNKNOWN_FIELD_ERROR, + msg="$ltrim should reject case-mismatched field name 'Input'", + ), +] + +# Property [Dollar Sign Error]: a bare "$" is interpreted as a field path and "$$" is interpreted +# as an empty variable name. +LTRIM_DOLLAR_SIGN_ERROR_TESTS: list[LtrimTest] = [ + LtrimTest( + "dollar_bare_input", + input="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$ltrim should reject bare '$' as input field path", + ), + LtrimTest( + "dollar_bare_chars", + input="hello", + chars="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$ltrim should reject bare '$' as chars field path", + ), + LtrimTest( + "dollar_double_input", + input="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$ltrim should reject '$$' as empty variable name in input", + ), + LtrimTest( + "dollar_double_chars", + input="hello", + chars="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$ltrim should reject '$$' as empty variable name in chars", + ), +] + +LTRIM_INVALID_ARGS_ALL_TESTS = LTRIM_SYNTAX_ERROR_TESTS + LTRIM_DOLLAR_SIGN_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_INVALID_ARGS_ALL_TESTS)) +def test_ltrim_invalid_args(collection, test_case: LtrimTest): + """Test $ltrim syntax validation and dollar sign error cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py new file mode 100644 index 00000000..b14ea30d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + _OMIT, + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) + +# Property [Identity]: empty string chars is the identity element. The result equals input +# unchanged. +LTRIM_IDENTITY_TESTS: list[LtrimTest] = [ + LtrimTest( + "identity_plain", + input="hello", + chars="", + expected="hello", + msg="$ltrim should return input unchanged when chars is empty", + ), + LtrimTest( + "identity_leading_spaces", + input=" hello", + chars="", + expected=" hello", + msg="$ltrim should preserve leading spaces when chars is empty", + ), + LtrimTest( + "identity_empty_input", + input="", + chars="", + expected="", + msg="$ltrim should return empty string when both input and chars are empty", + ), + LtrimTest( + "identity_repeated_leading", + input="aaahello", + chars="", + expected="aaahello", + msg="$ltrim should preserve leading chars when chars is empty", + ), + LtrimTest( + "identity_unicode", + input="日本語", + chars="", + expected="日本語", + msg="$ltrim should return Unicode input unchanged when chars is empty", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_IDENTITY_TESTS)) +def test_ltrim_identity(collection, test_case: LtrimTest): + """Test $ltrim identity cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Idempotency]: applying $ltrim twice with the same chars yields the same result as +# applying it once. +LTRIM_IDEMPOTENCY_TESTS: list[LtrimTest] = [ + LtrimTest( + "idempotent_default", + input=" hello", + expected="hello", + msg="$ltrim should be idempotent with default whitespace trimming", + ), + LtrimTest( + "idempotent_custom", + input="aaahello", + chars="a", + expected="hello", + msg="$ltrim should be idempotent with custom chars", + ), + LtrimTest( + "idempotent_mixed_whitespace", + input=" \t\nhello", + expected="hello", + msg="$ltrim should be idempotent with mixed whitespace", + ), + LtrimTest( + "idempotent_no_trim", + input="hello", + expected="hello", + msg="$ltrim should be idempotent when no trimming occurs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_IDEMPOTENCY_TESTS)) +def test_ltrim_idempotency(collection, test_case: LtrimTest): + """Test $ltrim idempotency.""" + once = _expr(test_case) + twice = {"$ltrim": {"input": once}} + if test_case.chars is not _OMIT: + twice["$ltrim"]["chars"] = test_case.chars + result = execute_project(collection, {"once": once, "twice": twice}) + assertSuccess( + result, [{"once": test_case.expected, "twice": test_case.expected}], msg=test_case.msg + ) + + +# Property [Suffix Invariant]: the result is always a suffix of the original input string. +LTRIM_SUFFIX_INVARIANT_TESTS: list[LtrimTest] = [ + LtrimTest( + "suffix_default_trim", + input=" hello", + msg="$ltrim result should be a suffix of input after default trimming", + ), + LtrimTest( + "suffix_custom_chars", + input="aaahello", + chars="a", + msg="$ltrim result should be a suffix of input after custom char trimming", + ), + LtrimTest( + "suffix_no_trim_needed", + input="hello", + msg="$ltrim result should be a suffix of input when no trimming needed", + ), + LtrimTest( + "suffix_all_trimmed", + input=" ", + msg="$ltrim result should be a suffix of input when all chars trimmed", + ), + LtrimTest( + "suffix_mixed_whitespace", + input="\t\nhello world", + msg="$ltrim result should be a suffix of input with mixed whitespace", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_SUFFIX_INVARIANT_TESTS)) +def test_ltrim_suffix_invariant(collection, test_case: LtrimTest): + """Test $ltrim result is always a suffix of the original input.""" + ltrim_result = _expr(test_case) + input_len = {"$strLenCP": test_case.input} + result_len = {"$strLenCP": ltrim_result} + suffix = { + "$substrCP": [ + test_case.input, + {"$subtract": [input_len, result_len]}, + result_len, + ] + } + result = execute_expression(collection, {"$eq": [ltrim_result, suffix]}) + assertSuccess(result, [{"result": True}], msg=test_case.msg) + + +# Property [First Char Invariant]: the first character of a non-empty result is not a member of +# the trim character set. Only tested with custom chars where membership can be checked +# server-side via $indexOfCP. +LTRIM_FIRST_CHAR_INVARIANT_TESTS: list[LtrimTest] = [ + LtrimTest( + "first_char_single", + input="aaahello", + chars="a", + msg="$ltrim result's first char should not be in single-char trim set", + ), + LtrimTest( + "first_char_multi", + input="abcdef", + chars="abc", + msg="$ltrim result's first char should not be in multi-char trim set", + ), + LtrimTest( + "first_char_all_leading", + input="xyzabc", + chars="xyz", + msg="$ltrim result's first char should not be in trim set after full leading trim", + ), + LtrimTest( + "first_char_no_trim", + input="hello", + chars="xyz", + msg="$ltrim result's first char should not be in trim set when no trimming occurs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_FIRST_CHAR_INVARIANT_TESTS)) +def test_ltrim_first_char_invariant(collection, test_case: LtrimTest): + """Test $ltrim result's first character is not in the trim set.""" + ltrim_result = _expr(test_case) + result_len = {"$strLenCP": ltrim_result} + first_char = {"$substrCP": [ltrim_result, 0, 1]} + result = execute_project( + collection, + { + "firstCharNotInChars": { + "$cond": { + "if": {"$gt": [result_len, 0]}, + "then": {"$eq": [{"$indexOfCP": [test_case.chars, first_char]}, -1]}, + "else": True, + } + }, + }, + ) + assertSuccess(result, [{"firstCharNotInChars": True}], msg=test_case.msg) + + +# Property [Return Type]: the result is always a string when the expression succeeds and no null +# propagation occurs. +LTRIM_RETURN_TYPE_TESTS: list[LtrimTest] = [ + LtrimTest( + "return_type_default_trim", + input=" hello", + msg="$ltrim should return string type after default trimming", + ), + LtrimTest( + "return_type_custom_no_match", + input="hello", + chars="x", + msg="$ltrim should return string type when custom chars don't match", + ), + LtrimTest( + "return_type_custom_trim", + input="aaahello", + chars="a", + msg="$ltrim should return string type after custom char trimming", + ), + LtrimTest( + "return_type_empty", input="", msg="$ltrim should return string type for empty input" + ), + LtrimTest( + "return_type_all_whitespace", + input=" ", + msg="$ltrim should return string type when all whitespace is trimmed", + ), + LtrimTest( + "return_type_unicode", + input="日本語", + chars="日", + msg="$ltrim should return string type after Unicode char trimming", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_RETURN_TYPE_TESTS)) +def test_ltrim_return_type(collection, test_case: LtrimTest): + """Test $ltrim result is always type string.""" + result = execute_expression(collection, {"$type": _expr(test_case)}) + assertSuccess(result, [{"result": "string"}], msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py new file mode 100644 index 00000000..5c460c0f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + _OMIT, + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Argument shapes for null/missing tests. _PLACEHOLDER is replaced with None or MISSING. +_PLACEHOLDER = object() +_NULL_PATTERNS = [ + (_PLACEHOLDER, _OMIT, "input_default_chars", "input is {kind} with default chars"), + (_PLACEHOLDER, "abc", "input_custom_chars", "input is {kind} with custom chars"), + ("hello", _PLACEHOLDER, "chars_valid_input", "chars is {kind} with valid string input"), + (_PLACEHOLDER, _PLACEHOLDER, "both", "both input and chars are {kind}"), + # Null/missing input takes precedence over non-string chars (no error raised). + (_PLACEHOLDER, 123, "precedence_chars_int", "input is {kind} even with non-string chars"), +] + + +def _build_null_tests(null_value, prefix) -> list[LtrimTest]: + return [ + LtrimTest( + f"{prefix}_{suffix}", + input=null_value if _input is _PLACEHOLDER else _input, + chars=null_value if _chars is _PLACEHOLDER else _chars, + expected=None, + msg=f"$ltrim should return null when {msg_tmpl.format(kind=prefix)}", + ) + for _input, _chars, suffix, msg_tmpl in _NULL_PATTERNS + ] + + +# Property [Null Propagation]: when either input or chars is null, the result is null. +LTRIM_NULL_TESTS = _build_null_tests(None, "null") + +# Property [Null Propagation - missing]: missing fields are treated as null. +LTRIM_MISSING_TESTS = _build_null_tests(MISSING, "missing") + +# Property [Null Propagation - mixed null and missing]: combining null and missing across +# positions still produces null. +LTRIM_MIXED_NULL_TESTS: list[LtrimTest] = [ + LtrimTest( + "mixed_null_input_missing_chars", + input=None, + chars=MISSING, + expected=None, + msg="$ltrim should return null when input is null and chars is missing", + ), + LtrimTest( + "mixed_missing_input_null_chars", + input=MISSING, + chars=None, + expected=None, + msg="$ltrim should return null when input is missing and chars is null", + ), +] + +LTRIM_NULL_ALL_TESTS = LTRIM_NULL_TESTS + LTRIM_MISSING_TESTS + LTRIM_MIXED_NULL_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_NULL_ALL_TESTS)) +def test_ltrim_null(collection, test_case: LtrimTest): + """Test $ltrim null propagation cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py new file mode 100644 index 00000000..bc17e2eb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [String Size Limit - Success]: input one byte under the limit is accepted. +LTRIM_SIZE_LIMIT_SUCCESS_TESTS: list[LtrimTest] = [ + LtrimTest( + "size_one_under", + input="a" * (STRING_SIZE_LIMIT_BYTES - 1), + expected="a" * (STRING_SIZE_LIMIT_BYTES - 1), + msg="$ltrim should accept input one byte under the size limit", + ), + # 2-byte chars: one byte under the limit. + LtrimTest( + "size_one_under_2byte", + input="\u00e9" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a", + expected="\u00e9" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a", + msg="$ltrim should accept 2-byte character input one byte under the size limit", + ), + # Large input with many leading trim characters, just under the limit. + LtrimTest( + "size_trim_leading", + input="a" * (STRING_SIZE_LIMIT_BYTES - 6) + "hello", + chars="a", + expected="hello", + msg="$ltrim should trim many leading characters near the size limit", + ), +] + +# Property [String Size Limit - Error]: input at the BSON string byte limit produces an error. +# Note: These use $concat to avoid parse-time rejection of oversized literals. +LTRIM_SIZE_LIMIT_ERROR_TESTS: list[LtrimTest] = [ + LtrimTest( + "size_at_limit", + input="a" * STRING_SIZE_LIMIT_BYTES, + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$ltrim should reject input at the BSON string byte limit", + ), + LtrimTest( + "size_at_limit_2byte", + input="\u00e9" * (STRING_SIZE_LIMIT_BYTES // 2), + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$ltrim should reject 2-byte character input at the BSON string byte limit", + ), +] + +LTRIM_SIZE_LIMIT_ALL_TESTS = LTRIM_SIZE_LIMIT_SUCCESS_TESTS + LTRIM_SIZE_LIMIT_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_SIZE_LIMIT_ALL_TESTS)) +def test_ltrim_size_limit(collection, test_case: LtrimTest): + """Test $ltrim string size limit cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py new file mode 100644 index 00000000..364eea77 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py @@ -0,0 +1,279 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import TRIM_CHARS_TYPE_ERROR, TRIM_INPUT_TYPE_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF, MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + LtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Strictness - input]: non-string, non-null input produces TRIM_INPUT_TYPE_ERROR. +LTRIM_INPUT_TYPE_ERROR_TESTS: list[LtrimTest] = [ + LtrimTest( + "type_input_array", + input=["a"], + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject array as input", + ), + LtrimTest( + "type_input_binary", + input=Binary(b"data"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject Binary as input", + ), + LtrimTest( + "type_input_bool", + input=True, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject boolean as input", + ), + LtrimTest( + "type_input_date", + input=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject datetime as input", + ), + LtrimTest( + "type_input_decimal128", + input=DECIMAL128_ONE_AND_HALF, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject Decimal128 as input", + ), + LtrimTest( + "type_input_float", + input=3.14, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject float as input", + ), + LtrimTest( + "type_input_int", + input=42, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject int as input", + ), + LtrimTest( + "type_input_long", + input=Int64(42), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject Int64 as input", + ), + LtrimTest( + "type_input_maxkey", + input=MaxKey(), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject MaxKey as input", + ), + LtrimTest( + "type_input_minkey", + input=MinKey(), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject MinKey as input", + ), + LtrimTest( + "type_input_object", + input={"a": 1}, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject object as input", + ), + LtrimTest( + "type_input_objectid", + input=ObjectId("507f1f77bcf86cd799439011"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject ObjectId as input", + ), + LtrimTest( + "type_input_regex", + input=Regex("pattern"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject Regex as input", + ), + LtrimTest( + "type_input_timestamp", + input=Timestamp(1, 1), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject Timestamp as input", + ), + LtrimTest( + "type_input_code", + input=Code("function() {}"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject Code as input", + ), + LtrimTest( + "type_input_code_scope", + input=Code("function() {}", {"x": 1}), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject Code with scope as input", + ), +] + +# Property [Type Strictness - chars]: non-string, non-null chars produces TRIM_CHARS_TYPE_ERROR. +LTRIM_CHARS_TYPE_ERROR_TESTS: list[LtrimTest] = [ + LtrimTest( + "type_chars_array", + input="hello", + chars=["a"], + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject array as chars", + ), + LtrimTest( + "type_chars_binary", + input="hello", + chars=Binary(b"data"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject Binary as chars", + ), + LtrimTest( + "type_chars_bool", + input="hello", + chars=True, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject boolean as chars", + ), + LtrimTest( + "type_chars_date", + input="hello", + chars=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject datetime as chars", + ), + LtrimTest( + "type_chars_decimal128", + input="hello", + chars=DECIMAL128_ONE_AND_HALF, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject Decimal128 as chars", + ), + LtrimTest( + "type_chars_float", + input="hello", + chars=3.14, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject float as chars", + ), + LtrimTest( + "type_chars_int", + input="hello", + chars=42, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject int as chars", + ), + LtrimTest( + "type_chars_long", + input="hello", + chars=Int64(42), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject Int64 as chars", + ), + LtrimTest( + "type_chars_maxkey", + input="hello", + chars=MaxKey(), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject MaxKey as chars", + ), + LtrimTest( + "type_chars_minkey", + input="hello", + chars=MinKey(), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject MinKey as chars", + ), + LtrimTest( + "type_chars_object", + input="hello", + chars={"a": 1}, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject object as chars", + ), + LtrimTest( + "type_chars_objectid", + input="hello", + chars=ObjectId("507f1f77bcf86cd799439011"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject ObjectId as chars", + ), + LtrimTest( + "type_chars_regex", + input="hello", + chars=Regex("pattern"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject Regex as chars", + ), + LtrimTest( + "type_chars_timestamp", + input="hello", + chars=Timestamp(1, 1), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject Timestamp as chars", + ), + LtrimTest( + "type_chars_code", + input="hello", + chars=Code("function() {}"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject Code as chars", + ), + LtrimTest( + "type_chars_code_scope", + input="hello", + chars=Code("function() {}", {"x": 1}), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$ltrim should reject Code with scope as chars", + ), +] + +# Property [Type Strictness - precedence]: when both input and chars are non-string, the input +# type error takes precedence. +LTRIM_TYPE_PRECEDENCE_TESTS: list[LtrimTest] = [ + LtrimTest( + "type_precedence_both_int", + input=123, + chars=456, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should report input type error before chars type error when both are invalid", + ), +] + +# Property [Type Strictness - input with null chars]: non-string input errors even when chars is +# null or missing. +LTRIM_INPUT_TYPE_NULL_CHARS_TESTS: list[LtrimTest] = [ + LtrimTest( + "type_input_int_chars_null", + input=123, + chars=None, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject non-string input even when chars is null", + ), + LtrimTest( + "type_input_int_chars_missing", + input=123, + chars=MISSING, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$ltrim should reject non-string input even when chars is missing", + ), +] + +LTRIM_TYPE_ERROR_ALL_TESTS = ( + LTRIM_INPUT_TYPE_ERROR_TESTS + + LTRIM_CHARS_TYPE_ERROR_TESTS + + LTRIM_TYPE_PRECEDENCE_TESTS + + LTRIM_INPUT_TYPE_NULL_CHARS_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(LTRIM_TYPE_ERROR_ALL_TESTS)) +def test_ltrim_type_errors(collection, test_case: LtrimTest): + """Test $ltrim type strictness cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/utils/ltrim_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/utils/ltrim_common.py new file mode 100644 index 00000000..303f8937 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/utils/ltrim_common.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from documentdb_tests.framework.test_case import BaseTestCase + +# Sentinel for "omit this parameter from the expression." Distinct from None (which means pass +# null) and MISSING (which means reference a missing field). +_OMIT = object() + + +@dataclass(frozen=True) +class LtrimTest(BaseTestCase): + """Test case for $ltrim operator.""" + + input: Any = None + chars: Any = _OMIT + expr: Any = None # Raw expression override (for syntax tests) + + +def _expr(test_case: LtrimTest) -> dict[str, Any]: + if test_case.expr is not None: + return cast(dict[str, Any], test_case.expr) + params: dict[str, Any] = {"input": test_case.input} + if test_case.chars is not _OMIT: + params["chars"] = test_case.chars + return {"$ltrim": params} diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py new file mode 100644 index 00000000..4d041343 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Custom Chars]: when chars is provided, only those characters are trimmed from the +# trailing edge. Each character in chars is treated individually (not as a substring), and the +# order of characters in chars does not affect the result. +RTRIM_CUSTOM_CHARS_TESTS: list[RtrimTest] = [ + RtrimTest( + "custom_single_char", + input="helloaaa", + chars="a", + expected="hello", + msg="$rtrim should trim trailing occurrences of a single custom char", + ), + RtrimTest( + "custom_repeated_single", + input="helloxxx", + chars="x", + expected="hello", + msg="$rtrim should trim repeated trailing custom char", + ), + # Custom chars completely replaces the default whitespace set. + RtrimTest( + "custom_spaces_not_trimmed", + input="helloxxx ", + chars="x", + expected="helloxxx ", + msg="$rtrim should not trim spaces when custom chars replaces default set", + ), + RtrimTest( + "custom_tabs_not_trimmed", + input="helloxxx\t\t", + chars="x", + expected="helloxxx\t\t", + msg="$rtrim should not trim tabs when custom chars replaces default set", + ), + # Duplicate characters in chars have no additional effect. + RtrimTest( + "custom_duplicate_chars", + input="helloaaa", + chars="aab", + expected="hello", + msg="$rtrim should ignore duplicate characters in chars", + ), + # Each character in chars is treated individually, not as a substring pattern. + RtrimTest( + "custom_individual_any_order", + input="hellocba", + chars="abc", + expected="hello", + msg="$rtrim should treat each char in chars individually, not as a substring", + ), + RtrimTest( + "custom_individual_mixed_repeats", + input="helloaabbcc", + chars="abc", + expected="hello", + msg="$rtrim should trim mixed repeats of individual chars", + ), + # Characters not in chars are preserved. + RtrimTest( + "custom_no_match", + input="hellox", + chars="a", + expected="hellox", + msg="$rtrim should preserve trailing chars not in the trim set", + ), + # Order of characters in chars does not affect the result. + RtrimTest( + "custom_order_abc", + input="hellobcaa", + chars="abc", + expected="hello", + msg="$rtrim should produce same result regardless of chars order (abc)", + ), + RtrimTest( + "custom_order_cba", + input="hellobcaa", + chars="cba", + expected="hello", + msg="$rtrim should produce same result regardless of chars order (cba)", + ), + RtrimTest( + "custom_order_bac", + input="hellobcaa", + chars="bac", + expected="hello", + msg="$rtrim should produce same result regardless of chars order (bac)", + ), +] + + +# Property [Directionality]: only trailing (right-side) characters are trimmed. Leading +# characters matching the trim set are preserved. +RTRIM_DIRECTIONALITY_TESTS: list[RtrimTest] = [ + RtrimTest( + "dir_leading_spaces", + input=" hello", + expected=" hello", + msg="$rtrim should preserve leading spaces", + ), + RtrimTest( + "dir_trailing_trimmed_leading_kept", + input=" hello ", + expected=" hello", + msg="$rtrim should trim trailing spaces while preserving leading spaces", + ), + RtrimTest( + "dir_custom_leading_kept", + input="aaahelloaaa", + chars="a", + expected="aaahello", + msg="$rtrim should trim trailing custom chars while preserving leading ones", + ), + RtrimTest( + "dir_leading_tab_kept", + input="\thello\t", + expected="\thello", + msg="$rtrim should trim trailing tab while preserving leading tab", + ), + # Last character not in trim set stops all trimming. + RtrimTest( + "dir_last_char_not_in_set", + input="hxexlxlxo", + chars="x", + expected="hxexlxlxo", + msg="$rtrim should not trim when last character is not in the trim set", + ), +] + +# Property [Whitespace Subset Chars]: when chars is a subset of whitespace characters, only +# those specific whitespace characters are trimmed; other whitespace is preserved. +RTRIM_WHITESPACE_SUBSET_TESTS: list[RtrimTest] = [ + RtrimTest( + "subset_tab_only", + input="\t hello \t", + chars="\t", + expected="\t hello ", + msg="$rtrim with chars=tab should trim only trailing tabs, preserving spaces", + ), + RtrimTest( + "subset_space_tab", + input=" \t\nhello\n\t ", + chars=" \t", + expected=" \t\nhello\n", + msg="$rtrim with chars=space+tab should preserve trailing newlines", + ), + RtrimTest( + "subset_newline_only", + input="hello \n\n", + chars="\n", + expected="hello ", + msg="$rtrim with chars=newline should trim only trailing newlines, preserving spaces", + ), +] + +RTRIM_CUSTOM_CHARS_ALL_TESTS = ( + RTRIM_CUSTOM_CHARS_TESTS + RTRIM_DIRECTIONALITY_TESTS + RTRIM_WHITESPACE_SUBSET_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_CUSTOM_CHARS_ALL_TESTS)) +def test_rtrim_custom_chars(collection, test_case: RtrimTest): + """Test $rtrim custom chars and directionality.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py new file mode 100644 index 00000000..ed8219f2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Default Trimming]: when chars is omitted, trailing whitespace is trimmed. The default +# set includes ASCII whitespace (space, tab, newline, carriage return, form feed, vertical tab), +# the null byte (U+0000), non-breaking space (U+00A0), and Unicode "Zs" category spaces. +RTRIM_DEFAULT_TRIM_TESTS: list[RtrimTest] = [ + # ASCII whitespace characters. + RtrimTest( + "default_space", + input="hello ", + expected="hello", + msg="$rtrim should trim trailing space by default", + ), + RtrimTest( + "default_tab", + input="hello\t", + expected="hello", + msg="$rtrim should trim trailing tab by default", + ), + RtrimTest( + "default_newline", + input="hello\n", + expected="hello", + msg="$rtrim should trim trailing newline by default", + ), + RtrimTest( + "default_cr", + input="hello\r", + expected="hello", + msg="$rtrim should trim trailing carriage return by default", + ), + RtrimTest( + "default_form_feed", + input="hello\f", + expected="hello", + msg="$rtrim should trim trailing form feed by default", + ), + RtrimTest( + "default_vertical_tab", + input="hello\x0b", + expected="hello", + msg="$rtrim should trim trailing vertical tab by default", + ), + # Null byte (U+0000). + RtrimTest( + "default_null_byte", + input="hello\x00", + expected="hello", + msg="$rtrim should trim trailing null byte by default", + ), + # Non-breaking space (U+00A0). + RtrimTest( + "default_nbsp", + input="hello\u00a0", + expected="hello", + msg="$rtrim should trim trailing non-breaking space by default", + ), + # Unicode "Zs" category spaces. + RtrimTest( + "default_en_space", + input="hello\u2000", + expected="hello", + msg="$rtrim should trim trailing en space (U+2000) by default", + ), + RtrimTest( + "default_em_space", + input="hello\u2003", + expected="hello", + msg="$rtrim should trim trailing em space (U+2003) by default", + ), + RtrimTest( + "default_thin_space", + input="hello\u2009", + expected="hello", + msg="$rtrim should trim trailing thin space (U+2009) by default", + ), + RtrimTest( + "default_hair_space", + input="hello\u200a", + expected="hello", + msg="$rtrim should trim trailing hair space (U+200A) by default", + ), + RtrimTest( + "default_ogham_space", + input="hello\u1680", + expected="hello", + msg="$rtrim should trim trailing ogham space (U+1680) by default", + ), + RtrimTest( + "default_em_quad", + input="hello\u2001", + expected="hello", + msg="$rtrim should trim trailing em quad (U+2001) by default", + ), + RtrimTest( + "default_en_space_2002", + input="hello\u2002", + expected="hello", + msg="$rtrim should trim trailing en space (U+2002) by default", + ), + RtrimTest( + "default_three_per_em", + input="hello\u2004", + expected="hello", + msg="$rtrim should trim trailing three-per-em space (U+2004) by default", + ), + RtrimTest( + "default_four_per_em", + input="hello\u2005", + expected="hello", + msg="$rtrim should trim trailing four-per-em space (U+2005) by default", + ), + RtrimTest( + "default_six_per_em", + input="hello\u2006", + expected="hello", + msg="$rtrim should trim trailing six-per-em space (U+2006) by default", + ), + RtrimTest( + "default_figure_space", + input="hello\u2007", + expected="hello", + msg="$rtrim should trim trailing figure space (U+2007) by default", + ), + RtrimTest( + "default_punctuation_space", + input="hello\u2008", + expected="hello", + msg="$rtrim should trim trailing punctuation space (U+2008) by default", + ), + # Multiple mixed whitespace characters. + RtrimTest( + "default_mixed_ascii_whitespace", + input="hello \t\n\r\f\x0b", + expected="hello", + msg="$rtrim should trim all mixed ASCII whitespace from trailing edge", + ), + RtrimTest( + "default_mixed_unicode_whitespace", + input="hello\u00a0\u2000\u2003", + expected="hello", + msg="$rtrim should trim mixed Unicode whitespace from trailing edge", + ), + RtrimTest( + "default_mixed_ascii_and_unicode", + input="hello \t\u00a0\u2000", + expected="hello", + msg="$rtrim should trim mixed ASCII and Unicode whitespace from trailing edge", + ), + # All 20 default whitespace code points as a trailing suffix. + RtrimTest( + "default_all_20_mixed", + input=( + "hello\x00\t\n\x0b\f\r \u00a0\u1680\u2000\u2001\u2002" + "\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a" + ), + expected="hello", + msg="$rtrim should trim all 20 default whitespace code points from trailing edge", + ), +] + +# Property [Default Trimming - exclusions]: characters not in the default whitespace set are +# preserved as trailing characters. +RTRIM_DEFAULT_TRIM_EXCLUSION_TESTS: list[RtrimTest] = [ + # Zero-width space (U+200B). + RtrimTest( + "default_excl_zwsp", + input="hello\u200b", + expected="hello\u200b", + msg="$rtrim should not trim zero-width space (U+200B) by default", + ), + # Line separator (U+2028). + RtrimTest( + "default_excl_line_separator", + input="hello\u2028", + expected="hello\u2028", + msg="$rtrim should not trim line separator (U+2028) by default", + ), + # Paragraph separator (U+2029). + RtrimTest( + "default_excl_paragraph_separator", + input="hello\u2029", + expected="hello\u2029", + msg="$rtrim should not trim paragraph separator (U+2029) by default", + ), + # Next line (U+0085). + RtrimTest( + "default_excl_next_line", + input="hello\u0085", + expected="hello\u0085", + msg="$rtrim should not trim next line (U+0085) by default", + ), + # Ideographic space (U+3000). + RtrimTest( + "default_excl_ideographic_space", + input="hello\u3000", + expected="hello\u3000", + msg="$rtrim should not trim ideographic space (U+3000) by default", + ), + # Narrow no-break space (U+202F). + RtrimTest( + "default_excl_narrow_nbsp", + input="hello\u202f", + expected="hello\u202f", + msg="$rtrim should not trim narrow no-break space (U+202F) by default", + ), + # Medium mathematical space (U+205F). + RtrimTest( + "default_excl_medium_math_space", + input="hello\u205f", + expected="hello\u205f", + msg="$rtrim should not trim medium mathematical space (U+205F) by default", + ), + # BOM / zero-width no-break space (U+FEFF). + RtrimTest( + "default_excl_bom", + input="hello\ufeff", + expected="hello\ufeff", + msg="$rtrim should not trim BOM / zero-width no-break space (U+FEFF) by default", + ), +] + + +# Property [Edge Cases]: the operator produces correct results at input extremes: empty strings, +# fully trimmable strings, and large inputs. +RTRIM_EDGE_TESTS: list[RtrimTest] = [ + RtrimTest( + "edge_empty_default", + input="", + expected="", + msg="$rtrim should return empty string for empty input with default chars", + ), + RtrimTest( + "edge_empty_custom", + input="", + chars="abc", + expected="", + msg="$rtrim should return empty string for empty input with custom chars", + ), + RtrimTest( + "edge_all_whitespace", + input=" ", + expected="", + msg="$rtrim should return empty string when input is all whitespace", + ), + RtrimTest( + "edge_all_in_chars", + input="aaabbb", + chars="ab", + expected="", + msg="$rtrim should return empty string when all characters are in trim set", + ), + # Null byte in custom chars is not treated as a C-style string terminator. + RtrimTest( + "edge_null_byte_custom", + input="hello\x00\x00", + chars="\x00", + expected="hello", + msg="$rtrim should trim null bytes without treating them as C-string terminators", + ), + # Control character stops default trimming even if preceded by whitespace. + RtrimTest( + "edge_control_char_stops_trim", + input="hello \x01", + expected="hello \x01", + msg="$rtrim should not trim control character U+0001 by default", + ), + # Trailing space trimmed, control character stops further trimming. + RtrimTest( + "edge_control_char_then_space", + input="hello\x01 ", + expected="hello\x01", + msg="$rtrim should trim trailing space but stop at control character", + ), +] + +RTRIM_DEFAULT_TRIM_ALL_TESTS = ( + RTRIM_DEFAULT_TRIM_TESTS + RTRIM_DEFAULT_TRIM_EXCLUSION_TESTS + RTRIM_EDGE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_DEFAULT_TRIM_ALL_TESTS)) +def test_rtrim_default_trim(collection, test_case: RtrimTest): + """Test $rtrim default trimming, exclusions, and edge cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py new file mode 100644 index 00000000..764f2e9b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py @@ -0,0 +1,173 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Encoding and Character Handling]: trimming operates on whole Unicode code points, +# not bytes or substrings. +RTRIM_ENCODING_TESTS: list[RtrimTest] = [ + # 2-byte UTF-8 character (U+00E9, é). + RtrimTest( + "enc_2byte_trim", + input="hello\u00e9", + chars="\u00e9", + expected="hello", + msg="$rtrim should trim 2-byte UTF-8 character é (U+00E9)", + ), + # 3-byte UTF-8 character (U+2603, ☃). + RtrimTest( + "enc_3byte_trim", + input="hello\u2603", + chars="\u2603", + expected="hello", + msg="$rtrim should trim 3-byte UTF-8 character ☃ (U+2603)", + ), + # 4-byte UTF-8 character (U+1F600, 😀). + RtrimTest( + "enc_4byte_trim", + input="hello\U0001f600", + chars="\U0001f600", + expected="hello", + msg="$rtrim should trim 4-byte UTF-8 character 😀 (U+1F600)", + ), + # Mixed multi-byte characters in chars, each treated individually. + RtrimTest( + "enc_mixed_multibyte", + input="hello\u00e9\u2603\U0001f600", + chars="\U0001f600\u00e9\u2603", + expected="hello", + msg="$rtrim should trim mixed multi-byte characters individually", + ), + # Partial bytes do not match. Trimming "é" (U+00E9) should not affect "e" (U+0065). + RtrimTest( + "enc_partial_no_match", + input="helloe", + chars="\u00e9", + expected="helloe", + msg="$rtrim should not match partial byte sequences (é vs e)", + ), + # Regex-special characters treated as literals. + RtrimTest( + "enc_regex_special", + input="hello.*\\", + chars="\\*.", + expected="hello", + msg="$rtrim should treat regex-special characters as literals", + ), + RtrimTest( + "enc_regex_plus", + input="hello+++", + chars="+", + expected="hello", + msg="$rtrim should treat + as a literal character", + ), + RtrimTest( + "enc_regex_question", + input="hello???", + chars="?", + expected="hello", + msg="$rtrim should treat ? as a literal character", + ), + RtrimTest( + "enc_regex_brackets", + input="hello(([", + chars="([", + expected="hello", + msg="$rtrim should treat ( and [ as literal characters", + ), + # Trimming decomposed form from the right: combining mark in chars matches the trailing + # combining mark independently of the preceding base character. + RtrimTest( + "enc_combining_mark_strips_from_decomposed", + input="helloe\u0301", + chars="\u0301", + expected="helloe", + msg="$rtrim should trim combining mark independently from decomposed sequence", + ), + # Combining mark (U+0301) is a valid code point and can be trimmed. + RtrimTest( + "enc_combining_mark_trim", + input="hello\u0301", + chars="\u0301", + expected="hello", + msg="$rtrim should trim standalone combining mark (U+0301)", + ), + # Precomposed é in input, decomposed chars "e" + combining acute. Each char in chars is + # individual, so neither "e" nor U+0301 matches U+00E9. + RtrimTest( + "enc_decomposed_chars_precomposed_input", + input="hello\u00e9", + chars="e\u0301", + expected="hello\u00e9", + msg="$rtrim should not match precomposed é with decomposed chars e+combining accent", + ), + # Case sensitivity: uppercase and lowercase are distinct code points. + RtrimTest( + "enc_case_lower_no_match", + input="helloABC", + chars="abc", + expected="helloABC", + msg="$rtrim should not trim uppercase when chars contains lowercase", + ), + RtrimTest( + "enc_case_upper_no_match", + input="helloabc", + chars="ABC", + expected="helloabc", + msg="$rtrim should not trim lowercase when chars contains uppercase", + ), + RtrimTest( + "enc_case_exact_match", + input="helloAbC", + chars="AbC", + expected="hello", + msg="$rtrim should trim when case matches exactly", + ), + # Greek case sensitivity. + RtrimTest( + "enc_case_greek", + input="hello\u03c3", + chars="\u03a3", + expected="hello\u03c3", + msg="$rtrim should not fold Greek lowercase σ to uppercase Σ", + ), + # German sharp s (U+00DF) trimmed as single code point. + RtrimTest( + "enc_sharp_s", + input="hello\u00df\u00df", + chars="\u00df", + expected="hello", + msg="$rtrim should trim ß (U+00DF) as a single code point", + ), + # Surrogate-adjacent code points handled correctly. + RtrimTest( + "enc_surrogate_adj_d7ff", + input="hello\ud7ff", + chars="\ud7ff", + expected="hello", + msg="$rtrim should handle surrogate-adjacent code point U+D7FF", + ), + RtrimTest( + "enc_surrogate_adj_e000", + input="hello\ue000", + chars="\ue000", + expected="hello", + msg="$rtrim should handle surrogate-adjacent code point U+E000", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_ENCODING_TESTS)) +def test_rtrim_encoding(collection, test_case: RtrimTest): + """Test $rtrim encoding and character handling.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py new file mode 100644 index 00000000..3d6a2fdc --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.expression_test_case import ExpressionTestCase +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_expression_with_insert, +) + +# Property [Expression Arguments]: input and chars accept any expression that resolves to a +# string. Nested $rtrim expressions are also accepted. +RTRIM_EXPR_TESTS: list[RtrimTest] = [ + # input is an expression. + RtrimTest( + "expr_input_concat", + input={"$concat": ["hello", " "]}, + expected="hello", + msg="$rtrim should accept $concat expression as input", + ), + # chars is an expression. + RtrimTest( + "expr_chars_concat", + input="helloaaa", + chars={"$concat": ["a"]}, + expected="hello", + msg="$rtrim should accept $concat expression as chars", + ), + # Both input and chars are expressions. + RtrimTest( + "expr_both", + input={"$concat": ["hello", "xx"]}, + chars={"$concat": ["x"]}, + expected="hello", + msg="$rtrim should accept expressions for both input and chars", + ), + # Nested $rtrim as input to another $rtrim. + RtrimTest( + "expr_nested_rtrim", + input={"$rtrim": {"input": "helloaa "}}, + chars="a", + expected="hello", + msg="$rtrim should accept nested $rtrim as input expression", + ), + # $literal for dollar-prefixed strings. + RtrimTest( + "expr_literal_dollar", + input={"$literal": "hello$$$"}, + chars={"$literal": "$"}, + expected="hello", + msg="$rtrim should accept $literal for dollar-prefixed strings", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_EXPR_TESTS)) +def test_rtrim_input_forms(collection, test_case: RtrimTest): + """Test $rtrim expression arguments.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Document Field References]: $rtrim works with field references +# from inserted documents, not just inline literals. +RTRIM_FIELD_REF_TESTS: list[ExpressionTestCase] = [ + # Object expression: both input and chars from simple field paths. + ExpressionTestCase( + "field_object", + expression={"$rtrim": {"input": "$s", "chars": "$c"}}, + doc={"s": "helloaaa", "c": "a"}, + expected="hello", + msg="$rtrim should accept input and chars from document field paths", + ), + # Composite array: both from $arrayElemAt on a projected array-of-objects field. + ExpressionTestCase( + "field_composite_array", + expression={ + "$rtrim": { + "input": {"$arrayElemAt": ["$a.b", 0]}, + "chars": {"$arrayElemAt": ["$a.b", 1]}, + } + }, + doc={"a": [{"b": "helloaaa"}, {"b": "a"}]}, + expected="hello", + msg="$rtrim should accept input and chars from composite array field paths", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_FIELD_REF_TESTS)) +def test_rtrim_field_refs(collection, test_case: ExpressionTestCase): + """Test $rtrim with document field references.""" + result = execute_expression_with_insert(collection, test_case.expression, test_case.doc) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py new file mode 100644 index 00000000..f1a39336 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, + TRIM_MISSING_INPUT_ERROR, + TRIM_UNKNOWN_FIELD_ERROR, + TRIM_WRONG_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Syntax Validation]: invalid $rtrim object shapes produce errors. +RTRIM_SYNTAX_ERROR_TESTS: list[RtrimTest] = [ + # Non-document arguments produce TRIM_WRONG_TYPE_ERROR. + RtrimTest( + "syntax_string", + expr={"$rtrim": "hello"}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject string as argument", + ), + RtrimTest( + "syntax_array", + expr={"$rtrim": ["hello"]}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject array as argument", + ), + RtrimTest( + "syntax_null", + expr={"$rtrim": None}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject null as argument", + ), + RtrimTest( + "syntax_int", + expr={"$rtrim": 42}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject int as argument", + ), + RtrimTest( + "syntax_bool", + expr={"$rtrim": True}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject boolean as argument", + ), + RtrimTest( + "syntax_float", + expr={"$rtrim": 3.14}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject float as argument", + ), + RtrimTest( + "syntax_long", + expr={"$rtrim": Int64(42)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Int64 as argument", + ), + RtrimTest( + "syntax_binary", + expr={"$rtrim": Binary(b"data")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Binary as argument", + ), + RtrimTest( + "syntax_binary_uuid", + expr={"$rtrim": Binary(b"data", 4)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Binary UUID as argument", + ), + RtrimTest( + "syntax_date", + expr={"$rtrim": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject datetime as argument", + ), + RtrimTest( + "syntax_decimal128", + expr={"$rtrim": DECIMAL128_ONE_AND_HALF}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Decimal128 as argument", + ), + RtrimTest( + "syntax_maxkey", + expr={"$rtrim": MaxKey()}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject MaxKey as argument", + ), + RtrimTest( + "syntax_minkey", + expr={"$rtrim": MinKey()}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject MinKey as argument", + ), + RtrimTest( + "syntax_objectid", + expr={"$rtrim": ObjectId("507f1f77bcf86cd799439011")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject ObjectId as argument", + ), + RtrimTest( + "syntax_regex", + expr={"$rtrim": Regex("pattern")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Regex as argument", + ), + RtrimTest( + "syntax_timestamp", + expr={"$rtrim": Timestamp(1, 1)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Timestamp as argument", + ), + RtrimTest( + "syntax_code", + expr={"$rtrim": Code("function() {}")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Code as argument", + ), + RtrimTest( + "syntax_code_scope", + expr={"$rtrim": Code("function() {}", {"x": 1})}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$rtrim should reject Code with scope as argument", + ), + # Omitting input entirely. + RtrimTest( + "syntax_no_input", + expr={"$rtrim": {"chars": "a"}}, + error_code=TRIM_MISSING_INPUT_ERROR, + msg="$rtrim should require the input field", + ), + # Empty object. + RtrimTest( + "syntax_empty_object", + expr={"$rtrim": {}}, + error_code=TRIM_MISSING_INPUT_ERROR, + msg="$rtrim should reject empty object", + ), + # Unknown extra field. + RtrimTest( + "syntax_unknown_field", + expr={"$rtrim": {"input": "hello", "unknown": 1}}, + error_code=TRIM_UNKNOWN_FIELD_ERROR, + msg="$rtrim should reject unknown fields in the argument object", + ), + # Case-sensitive field names. + RtrimTest( + "syntax_case_sensitive_field", + expr={"$rtrim": {"Input": "hello"}}, + error_code=TRIM_UNKNOWN_FIELD_ERROR, + msg="$rtrim should reject case-mismatched field name 'Input'", + ), +] + +# Property [Dollar Sign Error]: a bare "$" is interpreted as a field path and "$$" is interpreted +# as an empty variable name. +RTRIM_DOLLAR_SIGN_ERROR_TESTS: list[RtrimTest] = [ + RtrimTest( + "dollar_bare_input", + input="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$rtrim should reject bare '$' as input field path", + ), + RtrimTest( + "dollar_bare_chars", + input="hello", + chars="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$rtrim should reject bare '$' as chars field path", + ), + RtrimTest( + "dollar_double_input", + input="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$rtrim should reject '$$' as empty variable name in input", + ), + RtrimTest( + "dollar_double_chars", + input="hello", + chars="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$rtrim should reject '$$' as empty variable name in chars", + ), +] + +RTRIM_INVALID_ARGS_ALL_TESTS = RTRIM_SYNTAX_ERROR_TESTS + RTRIM_DOLLAR_SIGN_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_INVALID_ARGS_ALL_TESTS)) +def test_rtrim_invalid_args(collection, test_case: RtrimTest): + """Test $rtrim syntax validation and dollar sign errors.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py new file mode 100644 index 00000000..c17666f2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py @@ -0,0 +1,245 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + _OMIT, + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) + +# Property [Identity]: empty string chars is the identity element. The result equals input +# unchanged. +RTRIM_IDENTITY_TESTS: list[RtrimTest] = [ + RtrimTest( + "identity_plain", + input="hello", + chars="", + expected="hello", + msg="$rtrim should return input unchanged when chars is empty", + ), + RtrimTest( + "identity_trailing_spaces", + input="hello ", + chars="", + expected="hello ", + msg="$rtrim should preserve trailing spaces when chars is empty", + ), + RtrimTest( + "identity_empty_input", + input="", + chars="", + expected="", + msg="$rtrim should return empty string when both input and chars are empty", + ), + RtrimTest( + "identity_repeated_trailing", + input="helloaaa", + chars="", + expected="helloaaa", + msg="$rtrim should preserve trailing chars when chars is empty", + ), + RtrimTest( + "identity_unicode", + input="日本語", + chars="", + expected="日本語", + msg="$rtrim should return Unicode input unchanged when chars is empty", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_IDENTITY_TESTS)) +def test_rtrim_identity(collection, test_case: RtrimTest): + """Test $rtrim identity (empty chars).""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Idempotency]: applying $rtrim twice with the same chars yields the same result as +# applying it once. +RTRIM_IDEMPOTENCY_TESTS: list[RtrimTest] = [ + RtrimTest( + "idempotent_default", + input="hello ", + expected="hello", + msg="$rtrim should be idempotent with default whitespace trimming", + ), + RtrimTest( + "idempotent_custom", + input="helloaaa", + chars="a", + expected="hello", + msg="$rtrim should be idempotent with custom chars", + ), + RtrimTest( + "idempotent_mixed_whitespace", + input="hello \t\n", + expected="hello", + msg="$rtrim should be idempotent with mixed whitespace", + ), + RtrimTest( + "idempotent_no_trim", + input="hello", + expected="hello", + msg="$rtrim should be idempotent when no trimming occurs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_IDEMPOTENCY_TESTS)) +def test_rtrim_idempotency(collection, test_case: RtrimTest): + """Test $rtrim idempotency.""" + once = _expr(test_case) + twice = {"$rtrim": {"input": once}} + if test_case.chars is not _OMIT: + twice["$rtrim"]["chars"] = test_case.chars + result = execute_project(collection, {"once": once, "twice": twice}) + assertSuccess( + result, [{"once": test_case.expected, "twice": test_case.expected}], msg=test_case.msg + ) + + +# Property [Prefix Invariant]: the result is always a prefix of the original input string. +RTRIM_PREFIX_INVARIANT_TESTS: list[RtrimTest] = [ + RtrimTest( + "prefix_default_trim", + input="hello ", + msg="$rtrim result should be a prefix of input after default trimming", + ), + RtrimTest( + "prefix_custom_chars", + input="helloaaa", + chars="a", + msg="$rtrim result should be a prefix of input after custom char trimming", + ), + RtrimTest( + "prefix_no_trim_needed", + input="hello", + msg="$rtrim result should be a prefix of input when no trimming needed", + ), + RtrimTest( + "prefix_all_trimmed", + input=" ", + msg="$rtrim result should be a prefix of input when all chars trimmed", + ), + RtrimTest( + "prefix_mixed_whitespace", + input="hello world\t\n", + msg="$rtrim result should be a prefix of input with mixed whitespace", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_PREFIX_INVARIANT_TESTS)) +def test_rtrim_prefix_invariant(collection, test_case: RtrimTest): + """Test $rtrim result is always a prefix of the original input.""" + rtrim_result = _expr(test_case) + result_len = {"$strLenCP": rtrim_result} + prefix = {"$substrCP": [test_case.input, 0, result_len]} + result = execute_expression(collection, {"$eq": [rtrim_result, prefix]}) + assertSuccess(result, [{"result": True}], msg=test_case.msg) + + +# Property [Last Char Invariant]: the last character of a non-empty result is not a member of +# the trim character set. Only tested with custom chars where membership can be checked +# server-side via $indexOfCP. +RTRIM_LAST_CHAR_INVARIANT_TESTS: list[RtrimTest] = [ + RtrimTest( + "last_char_single", + input="helloaaa", + chars="a", + msg="$rtrim result's last char should not be in single-char trim set", + ), + RtrimTest( + "last_char_multi", + input="defabc", + chars="abc", + msg="$rtrim result's last char should not be in multi-char trim set", + ), + RtrimTest( + "last_char_all_trailing", + input="abcxyz", + chars="xyz", + msg="$rtrim result's last char should not be in trim set after full trailing trim", + ), + RtrimTest( + "last_char_no_trim", + input="hello", + chars="xyz", + msg="$rtrim result's last char should not be in trim set when no trimming occurs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_LAST_CHAR_INVARIANT_TESTS)) +def test_rtrim_last_char_invariant(collection, test_case: RtrimTest): + """Test $rtrim result's last character is not in the trim set.""" + rtrim_result = _expr(test_case) + result_len = {"$strLenCP": rtrim_result} + last_char = {"$substrCP": [rtrim_result, {"$subtract": [result_len, 1]}, 1]} + result = execute_project( + collection, + { + "lastCharNotInChars": { + "$cond": { + "if": {"$gt": [result_len, 0]}, + "then": {"$eq": [{"$indexOfCP": [test_case.chars, last_char]}, -1]}, + "else": True, + } + }, + }, + ) + assertSuccess(result, [{"lastCharNotInChars": True}], msg=test_case.msg) + + +# Property [Return Type]: the result is always a string when the expression succeeds and no null +# propagation occurs. +RTRIM_RETURN_TYPE_TESTS: list[RtrimTest] = [ + RtrimTest( + "return_type_default_trim", + input="hello ", + msg="$rtrim should return string type after default trimming", + ), + RtrimTest( + "return_type_custom_no_match", + input="hello", + chars="x", + msg="$rtrim should return string type when custom chars don't match", + ), + RtrimTest( + "return_type_custom_trim", + input="helloaaa", + chars="a", + msg="$rtrim should return string type after custom char trimming", + ), + RtrimTest( + "return_type_empty", input="", msg="$rtrim should return string type for empty input" + ), + RtrimTest( + "return_type_all_whitespace", + input=" ", + msg="$rtrim should return string type when all whitespace is trimmed", + ), + RtrimTest( + "return_type_unicode", + input="日本語", + chars="語", + msg="$rtrim should return string type after Unicode char trimming", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_RETURN_TYPE_TESTS)) +def test_rtrim_return_type(collection, test_case: RtrimTest): + """Test $rtrim result is always type string.""" + result = execute_expression(collection, {"$type": _expr(test_case)}) + assertSuccess(result, [{"result": "string"}], msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py new file mode 100644 index 00000000..b6f3139f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + _OMIT, + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Argument shapes for null/missing tests. _PLACEHOLDER is replaced with None or MISSING. +_PLACEHOLDER = object() +_NULL_PATTERNS = [ + (_PLACEHOLDER, _OMIT, "input_default_chars", "input is {kind} with default chars"), + (_PLACEHOLDER, "abc", "input_custom_chars", "input is {kind} with custom chars"), + ("hello", _PLACEHOLDER, "chars_valid_input", "chars is {kind} with valid string input"), + (_PLACEHOLDER, _PLACEHOLDER, "both", "both input and chars are {kind}"), + # Null/missing input takes precedence over non-string chars (no error raised). + (_PLACEHOLDER, 123, "precedence_chars_int", "input is {kind} even with non-string chars"), +] + + +def _build_null_tests(null_value, prefix) -> list[RtrimTest]: + return [ + RtrimTest( + f"{prefix}_{suffix}", + input=null_value if _input is _PLACEHOLDER else _input, + chars=null_value if _chars is _PLACEHOLDER else _chars, + expected=None, + msg=f"$rtrim should return null when {msg_tmpl.format(kind=prefix)}", + ) + for _input, _chars, suffix, msg_tmpl in _NULL_PATTERNS + ] + + +# Property [Null Propagation]: when either input or chars is null, the result is null. +RTRIM_NULL_TESTS = _build_null_tests(None, "null") + +# Property [Null Propagation - missing]: missing fields are treated as null. +RTRIM_MISSING_TESTS = _build_null_tests(MISSING, "missing") + +# Property [Null Propagation - mixed null and missing]: combining null and missing across +# positions still produces null. +RTRIM_MIXED_NULL_TESTS: list[RtrimTest] = [ + RtrimTest( + "mixed_null_input_missing_chars", + input=None, + chars=MISSING, + expected=None, + msg="$rtrim should return null when input is null and chars is missing", + ), + RtrimTest( + "mixed_missing_input_null_chars", + input=MISSING, + chars=None, + expected=None, + msg="$rtrim should return null when input is missing and chars is null", + ), +] + +RTRIM_NULL_ALL_TESTS = RTRIM_NULL_TESTS + RTRIM_MISSING_TESTS + RTRIM_MIXED_NULL_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_NULL_ALL_TESTS)) +def test_rtrim_null(collection, test_case: RtrimTest): + """Test $rtrim null propagation.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py new file mode 100644 index 00000000..5b69743f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [String Size Limit - Success]: input one byte under the limit is accepted. +RTRIM_SIZE_LIMIT_SUCCESS_TESTS: list[RtrimTest] = [ + RtrimTest( + "size_one_under", + input="a" * (STRING_SIZE_LIMIT_BYTES - 1), + expected="a" * (STRING_SIZE_LIMIT_BYTES - 1), + msg="$rtrim should accept input one byte under the size limit", + ), + # 2-byte chars: one byte under the limit. + RtrimTest( + "size_one_under_2byte", + input="\u00e9" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a", + expected="\u00e9" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a", + msg="$rtrim should accept 2-byte character input one byte under the size limit", + ), + # Large input with many trailing trim characters, just under the limit. + RtrimTest( + "size_trim_trailing", + input="hello" + "a" * (STRING_SIZE_LIMIT_BYTES - 6), + chars="a", + expected="hello", + msg="$rtrim should trim many trailing characters near the size limit", + ), +] + + +# Property [String Size Limit - Error]: input at the BSON string byte limit produces an error. +RTRIM_SIZE_LIMIT_ERROR_TESTS: list[RtrimTest] = [ + RtrimTest( + "size_at_limit", + input="a" * STRING_SIZE_LIMIT_BYTES, + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$rtrim should reject input at the BSON string byte limit", + ), + RtrimTest( + "size_at_limit_2byte", + input="\u00e9" * (STRING_SIZE_LIMIT_BYTES // 2), + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$rtrim should reject 2-byte character input at the BSON string byte limit", + ), +] + +RTRIM_SIZE_LIMIT_ALL_TESTS = RTRIM_SIZE_LIMIT_SUCCESS_TESTS + RTRIM_SIZE_LIMIT_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_SIZE_LIMIT_ALL_TESTS)) +def test_rtrim_size_limit(collection, test_case: RtrimTest): + """Test $rtrim string size limit.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py new file mode 100644 index 00000000..99ab19d9 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py @@ -0,0 +1,279 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import TRIM_CHARS_TYPE_ERROR, TRIM_INPUT_TYPE_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF, MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + RtrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Strictness - input]: non-string, non-null input produces TRIM_INPUT_TYPE_ERROR. +RTRIM_INPUT_TYPE_ERROR_TESTS: list[RtrimTest] = [ + RtrimTest( + "type_input_array", + input=["a"], + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject array as input", + ), + RtrimTest( + "type_input_binary", + input=Binary(b"data"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject Binary as input", + ), + RtrimTest( + "type_input_bool", + input=True, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject boolean as input", + ), + RtrimTest( + "type_input_date", + input=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject datetime as input", + ), + RtrimTest( + "type_input_decimal128", + input=DECIMAL128_ONE_AND_HALF, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject Decimal128 as input", + ), + RtrimTest( + "type_input_float", + input=3.14, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject float as input", + ), + RtrimTest( + "type_input_int", + input=42, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject int as input", + ), + RtrimTest( + "type_input_long", + input=Int64(42), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject Int64 as input", + ), + RtrimTest( + "type_input_maxkey", + input=MaxKey(), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject MaxKey as input", + ), + RtrimTest( + "type_input_minkey", + input=MinKey(), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject MinKey as input", + ), + RtrimTest( + "type_input_object", + input={"a": 1}, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject object as input", + ), + RtrimTest( + "type_input_objectid", + input=ObjectId("507f1f77bcf86cd799439011"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject ObjectId as input", + ), + RtrimTest( + "type_input_regex", + input=Regex("pattern"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject Regex as input", + ), + RtrimTest( + "type_input_timestamp", + input=Timestamp(1, 1), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject Timestamp as input", + ), + RtrimTest( + "type_input_code", + input=Code("function() {}"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject Code as input", + ), + RtrimTest( + "type_input_code_scope", + input=Code("function() {}", {"x": 1}), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject Code with scope as input", + ), +] + +# Property [Type Strictness - chars]: non-string, non-null chars produces TRIM_CHARS_TYPE_ERROR. +RTRIM_CHARS_TYPE_ERROR_TESTS: list[RtrimTest] = [ + RtrimTest( + "type_chars_array", + input="hello", + chars=["a"], + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject array as chars", + ), + RtrimTest( + "type_chars_binary", + input="hello", + chars=Binary(b"data"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject Binary as chars", + ), + RtrimTest( + "type_chars_bool", + input="hello", + chars=True, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject boolean as chars", + ), + RtrimTest( + "type_chars_date", + input="hello", + chars=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject datetime as chars", + ), + RtrimTest( + "type_chars_decimal128", + input="hello", + chars=DECIMAL128_ONE_AND_HALF, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject Decimal128 as chars", + ), + RtrimTest( + "type_chars_float", + input="hello", + chars=3.14, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject float as chars", + ), + RtrimTest( + "type_chars_int", + input="hello", + chars=42, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject int as chars", + ), + RtrimTest( + "type_chars_long", + input="hello", + chars=Int64(42), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject Int64 as chars", + ), + RtrimTest( + "type_chars_maxkey", + input="hello", + chars=MaxKey(), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject MaxKey as chars", + ), + RtrimTest( + "type_chars_minkey", + input="hello", + chars=MinKey(), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject MinKey as chars", + ), + RtrimTest( + "type_chars_object", + input="hello", + chars={"a": 1}, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject object as chars", + ), + RtrimTest( + "type_chars_objectid", + input="hello", + chars=ObjectId("507f1f77bcf86cd799439011"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject ObjectId as chars", + ), + RtrimTest( + "type_chars_regex", + input="hello", + chars=Regex("pattern"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject Regex as chars", + ), + RtrimTest( + "type_chars_timestamp", + input="hello", + chars=Timestamp(1, 1), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject Timestamp as chars", + ), + RtrimTest( + "type_chars_code", + input="hello", + chars=Code("function() {}"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject Code as chars", + ), + RtrimTest( + "type_chars_code_scope", + input="hello", + chars=Code("function() {}", {"x": 1}), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$rtrim should reject Code with scope as chars", + ), +] + +# Property [Type Strictness - precedence]: when both input and chars are non-string, the input +# type error takes precedence. +RTRIM_TYPE_PRECEDENCE_TESTS: list[RtrimTest] = [ + RtrimTest( + "type_precedence_both_int", + input=123, + chars=456, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should report input type error before chars type error when both are invalid", + ), +] + +# Property [Type Strictness - input with null chars]: non-string input errors even when chars is +# null or missing. +RTRIM_INPUT_TYPE_NULL_CHARS_TESTS: list[RtrimTest] = [ + RtrimTest( + "type_input_int_chars_null", + input=123, + chars=None, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject non-string input even when chars is null", + ), + RtrimTest( + "type_input_int_chars_missing", + input=123, + chars=MISSING, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$rtrim should reject non-string input even when chars is missing", + ), +] + +RTRIM_TYPE_ERROR_ALL_TESTS = ( + RTRIM_INPUT_TYPE_ERROR_TESTS + + RTRIM_CHARS_TYPE_ERROR_TESTS + + RTRIM_TYPE_PRECEDENCE_TESTS + + RTRIM_INPUT_TYPE_NULL_CHARS_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(RTRIM_TYPE_ERROR_ALL_TESTS)) +def test_rtrim_type_errors(collection, test_case: RtrimTest): + """Test $rtrim type strictness.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/utils/rtrim_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/utils/rtrim_common.py new file mode 100644 index 00000000..cbe8424e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/utils/rtrim_common.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from documentdb_tests.framework.test_case import BaseTestCase + +# Sentinel for "omit this parameter from the expression." Distinct from None (which means pass +# null) and MISSING (which means reference a missing field). +_OMIT = object() + + +@dataclass(frozen=True) +class RtrimTest(BaseTestCase): + """Test case for $rtrim operator.""" + + input: Any = None + chars: Any = _OMIT + expr: Any = None # Raw expression override for syntax tests + + +def _expr(test_case: RtrimTest) -> dict[str, Any]: + if test_case.expr is not None: + return cast(dict[str, Any], test_case.expr) + params: dict[str, Any] = {"input": test_case.input} + if test_case.chars is not _OMIT: + params["chars"] = test_case.chars + return {"$rtrim": params} diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py new file mode 100644 index 00000000..42cb75ed --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Custom Chars]: when chars is provided, only those characters are trimmed from both +# ends. Each character in chars is treated individually (not as a substring), and the order of +# characters in chars does not affect the result. +TRIM_CUSTOM_CHARS_TESTS: list[TrimTest] = [ + TrimTest( + "custom_single_char", + input="aaahelloaaa", + chars="a", + expected="hello", + msg="$trim should trim occurrences of a single custom char from both ends", + ), + TrimTest( + "custom_repeated_single", + input="xxxhelloxxx", + chars="x", + expected="hello", + msg="$trim should trim repeated custom char from both ends", + ), + # Custom chars completely replaces the default whitespace set. + TrimTest( + "custom_spaces_not_trimmed", + input=" xxxhelloxxx ", + chars="x", + expected=" xxxhelloxxx ", + msg="$trim should not trim spaces when custom chars replaces default set", + ), + TrimTest( + "custom_tabs_not_trimmed", + input="\t\txxxhelloxxx\t\t", + chars="x", + expected="\t\txxxhelloxxx\t\t", + msg="$trim should not trim tabs when custom chars replaces default set", + ), + # Duplicate characters in chars have no additional effect. + TrimTest( + "custom_duplicate_chars", + input="aaahelloaaa", + chars="aab", + expected="hello", + msg="$trim should ignore duplicate characters in chars", + ), + # Each character in chars is treated individually, not as a substring pattern. + TrimTest( + "custom_individual_any_order", + input="cbahelloabc", + chars="abc", + expected="hello", + msg="$trim should treat each char in chars individually, not as a substring", + ), + TrimTest( + "custom_individual_mixed_repeats", + input="aabbcchelloccbbaa", + chars="abc", + expected="hello", + msg="$trim should trim mixed repeats of individual chars from both ends", + ), + # Characters not in chars are preserved. + TrimTest( + "custom_no_match", + input="xhellox", + chars="a", + expected="xhellox", + msg="$trim should preserve chars not in the trim set", + ), + # Order of characters in chars does not affect the result. + TrimTest( + "custom_order_abc", + input="bcaahelloaabc", + chars="abc", + expected="hello", + msg="$trim should produce same result regardless of chars order (abc)", + ), + TrimTest( + "custom_order_cba", + input="bcaahelloaabc", + chars="cba", + expected="hello", + msg="$trim should produce same result regardless of chars order (cba)", + ), + TrimTest( + "custom_order_bac", + input="bcaahelloaabc", + chars="bac", + expected="hello", + msg="$trim should produce same result regardless of chars order (bac)", + ), +] + + +# Property [Whitespace Subset Chars]: when chars is a subset of whitespace characters, only +# those specific whitespace characters are trimmed; other whitespace is preserved. +TRIM_WHITESPACE_SUBSET_TESTS: list[TrimTest] = [ + TrimTest( + "subset_tab_only", + input="\t hello \t", + chars="\t", + expected=" hello ", + msg="$trim with chars=tab should trim only tabs, preserving spaces", + ), + TrimTest( + "subset_space_tab", + input=" \t\nhello\n\t ", + chars=" \t", + expected="\nhello\n", + msg="$trim with chars=space+tab should preserve newlines", + ), + TrimTest( + "subset_newline_only", + input="\n\n hello \n\n", + chars="\n", + expected=" hello ", + msg="$trim with chars=newline should trim only newlines, preserving spaces", + ), +] + +TRIM_CUSTOM_CHARS_ALL_TESTS = TRIM_CUSTOM_CHARS_TESTS + TRIM_WHITESPACE_SUBSET_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_CUSTOM_CHARS_ALL_TESTS)) +def test_trim_custom_chars(collection, test_case: TrimTest): + """Test $trim custom chars trimming.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py new file mode 100644 index 00000000..0d58a92d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py @@ -0,0 +1,323 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Default Trimming]: when chars is omitted, whitespace is trimmed from both ends. The +# default set includes ASCII whitespace (space, tab, newline, carriage return, form feed, +# vertical tab), the null byte (U+0000), non-breaking space (U+00A0), and Unicode "Zs" category +# spaces. +TRIM_DEFAULT_TRIM_TESTS: list[TrimTest] = [ + # ASCII whitespace characters. + TrimTest( + "default_space", + input=" hello ", + expected="hello", + msg="$trim should trim surrounding spaces by default", + ), + TrimTest( + "default_tab", + input="\thello\t", + expected="hello", + msg="$trim should trim surrounding tabs by default", + ), + TrimTest( + "default_newline", + input="\nhello\n", + expected="hello", + msg="$trim should trim surrounding newlines by default", + ), + TrimTest( + "default_cr", + input="\rhello\r", + expected="hello", + msg="$trim should trim surrounding carriage returns by default", + ), + TrimTest( + "default_form_feed", + input="\fhello\f", + expected="hello", + msg="$trim should trim surrounding form feeds by default", + ), + TrimTest( + "default_vertical_tab", + input="\x0bhello\x0b", + expected="hello", + msg="$trim should trim surrounding vertical tabs by default", + ), + # Null byte (U+0000). + TrimTest( + "default_null_byte", + input="\x00hello\x00", + expected="hello", + msg="$trim should trim surrounding null bytes by default", + ), + # Non-breaking space (U+00A0). + TrimTest( + "default_nbsp", + input="\u00a0hello\u00a0", + expected="hello", + msg="$trim should trim surrounding non-breaking spaces by default", + ), + # Unicode "Zs" category spaces. + TrimTest( + "default_en_space", + input="\u2000hello\u2000", + expected="hello", + msg="$trim should trim surrounding en space (U+2000) by default", + ), + TrimTest( + "default_em_space", + input="\u2003hello\u2003", + expected="hello", + msg="$trim should trim surrounding em space (U+2003) by default", + ), + TrimTest( + "default_thin_space", + input="\u2009hello\u2009", + expected="hello", + msg="$trim should trim surrounding thin space (U+2009) by default", + ), + TrimTest( + "default_hair_space", + input="\u200ahello\u200a", + expected="hello", + msg="$trim should trim surrounding hair space (U+200A) by default", + ), + TrimTest( + "default_ogham_space", + input="\u1680hello\u1680", + expected="hello", + msg="$trim should trim surrounding ogham space (U+1680) by default", + ), + TrimTest( + "default_em_quad", + input="\u2001hello\u2001", + expected="hello", + msg="$trim should trim surrounding em quad (U+2001) by default", + ), + TrimTest( + "default_en_space_2002", + input="\u2002hello\u2002", + expected="hello", + msg="$trim should trim surrounding en space (U+2002) by default", + ), + TrimTest( + "default_three_per_em", + input="\u2004hello\u2004", + expected="hello", + msg="$trim should trim surrounding three-per-em space (U+2004) by default", + ), + TrimTest( + "default_four_per_em", + input="\u2005hello\u2005", + expected="hello", + msg="$trim should trim surrounding four-per-em space (U+2005) by default", + ), + TrimTest( + "default_six_per_em", + input="\u2006hello\u2006", + expected="hello", + msg="$trim should trim surrounding six-per-em space (U+2006) by default", + ), + TrimTest( + "default_figure_space", + input="\u2007hello\u2007", + expected="hello", + msg="$trim should trim surrounding figure space (U+2007) by default", + ), + TrimTest( + "default_punctuation_space", + input="\u2008hello\u2008", + expected="hello", + msg="$trim should trim surrounding punctuation space (U+2008) by default", + ), + # Multiple mixed whitespace characters. + TrimTest( + "default_mixed_ascii_whitespace", + input=" \t\n\r\f\x0bhello\x0b\f\r\n\t ", + expected="hello", + msg="$trim should trim mixed ASCII whitespace from both ends", + ), + TrimTest( + "default_mixed_unicode_whitespace", + input="\u00a0\u2000\u2003hello\u2003\u2000\u00a0", + expected="hello", + msg="$trim should trim mixed Unicode whitespace from both ends", + ), + TrimTest( + "default_mixed_ascii_and_unicode", + input=" \t\u00a0\u2000hello\u2000\u00a0\t ", + expected="hello", + msg="$trim should trim mixed ASCII and Unicode whitespace from both ends", + ), + # Only leading whitespace. + TrimTest( + "default_leading_only", + input=" hello", + expected="hello", + msg="$trim should trim leading-only whitespace", + ), + # Only trailing whitespace. + TrimTest( + "default_trailing_only", + input="hello ", + expected="hello", + msg="$trim should trim trailing-only whitespace", + ), + # Interior whitespace is preserved. + TrimTest( + "default_interior_preserved", + input=" hello world ", + expected="hello world", + msg="$trim should preserve interior whitespace", + ), + # All 20 default whitespace code points on both sides. + TrimTest( + "default_all_20_mixed", + input=( + "\x00\t\n\x0b\f\r \u00a0\u1680\u2000\u2001\u2002" + "\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200ahello" + "\u200a\u2009\u2008\u2007\u2006\u2005\u2004\u2003" + "\u2002\u2001\u2000\u1680\u00a0 \r\f\x0b\n\t\x00" + ), + expected="hello", + msg="$trim should trim all 20 default whitespace code points from both ends", + ), +] + + +# Property [Default Trimming - exclusions]: characters not in the default whitespace set are +# preserved at both ends. +TRIM_DEFAULT_TRIM_EXCLUSION_TESTS: list[TrimTest] = [ + # Zero-width space (U+200B). + TrimTest( + "default_excl_zwsp", + input="\u200bhello\u200b", + expected="\u200bhello\u200b", + msg="$trim should not trim zero-width space (U+200B) by default", + ), + # Line separator (U+2028). + TrimTest( + "default_excl_line_separator", + input="\u2028hello\u2028", + expected="\u2028hello\u2028", + msg="$trim should not trim line separator (U+2028) by default", + ), + # Paragraph separator (U+2029). + TrimTest( + "default_excl_paragraph_separator", + input="\u2029hello\u2029", + expected="\u2029hello\u2029", + msg="$trim should not trim paragraph separator (U+2029) by default", + ), + # Next line (U+0085). + TrimTest( + "default_excl_next_line", + input="\u0085hello\u0085", + expected="\u0085hello\u0085", + msg="$trim should not trim next line (U+0085) by default", + ), + # Ideographic space (U+3000). + TrimTest( + "default_excl_ideographic_space", + input="\u3000hello\u3000", + expected="\u3000hello\u3000", + msg="$trim should not trim ideographic space (U+3000) by default", + ), + # Narrow no-break space (U+202F). + TrimTest( + "default_excl_narrow_nbsp", + input="\u202fhello\u202f", + expected="\u202fhello\u202f", + msg="$trim should not trim narrow no-break space (U+202F) by default", + ), + # Medium mathematical space (U+205F). + TrimTest( + "default_excl_medium_math_space", + input="\u205fhello\u205f", + expected="\u205fhello\u205f", + msg="$trim should not trim medium mathematical space (U+205F) by default", + ), + # BOM / zero-width no-break space (U+FEFF). + TrimTest( + "default_excl_bom", + input="\ufeffhello\ufeff", + expected="\ufeffhello\ufeff", + msg="$trim should not trim BOM / zero-width no-break space (U+FEFF) by default", + ), +] + + +# Property [Edge Cases]: the operator produces correct results at input extremes: empty strings, +# fully trimmable strings, and large inputs. +TRIM_EDGE_TESTS: list[TrimTest] = [ + TrimTest( + "edge_empty_default", + input="", + expected="", + msg="$trim should return empty string for empty input with default chars", + ), + TrimTest( + "edge_empty_custom", + input="", + chars="abc", + expected="", + msg="$trim should return empty string for empty input with custom chars", + ), + TrimTest( + "edge_all_whitespace", + input=" ", + expected="", + msg="$trim should return empty string when input is all whitespace", + ), + TrimTest( + "edge_all_in_chars", + input="aaabbb", + chars="ab", + expected="", + msg="$trim should return empty string when all characters are in trim set", + ), + # Null byte in custom chars is not treated as a C-style string terminator. + TrimTest( + "edge_null_byte_custom", + input="\x00\x00hello\x00\x00", + chars="\x00", + expected="hello", + msg="$trim should trim null bytes without treating them as C-string terminators", + ), + # Control characters stop default trimming. + TrimTest( + "edge_control_char_stops_trim", + input="\x01 hello \x01", + expected="\x01 hello \x01", + msg="$trim should not trim control character U+0001 by default", + ), + # Whitespace between content and control char is trimmed on the inner side. + TrimTest( + "edge_space_around_control_char", + input=" \x01hello\x01 ", + expected="\x01hello\x01", + msg="$trim should trim surrounding spaces but stop at control characters", + ), +] + +TRIM_DEFAULT_TRIM_ALL_TESTS = ( + TRIM_DEFAULT_TRIM_TESTS + TRIM_DEFAULT_TRIM_EXCLUSION_TESTS + TRIM_EDGE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_DEFAULT_TRIM_ALL_TESTS)) +def test_trim_default_trim(collection, test_case: TrimTest): + """Test $trim default trimming and edge cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py new file mode 100644 index 00000000..3ba5ac0f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Encoding and Character Handling]: trimming operates on whole Unicode code points, +# not bytes or substrings. +TRIM_ENCODING_TESTS: list[TrimTest] = [ + # 2-byte UTF-8 character (U+00E9, é). + TrimTest( + "enc_2byte_trim", + input="\u00e9hello\u00e9", + chars="\u00e9", + expected="hello", + msg="$trim should trim 2-byte UTF-8 character é (U+00E9) from both ends", + ), + # 3-byte UTF-8 character (U+2603, ☃). + TrimTest( + "enc_3byte_trim", + input="\u2603hello\u2603", + chars="\u2603", + expected="hello", + msg="$trim should trim 3-byte UTF-8 character ☃ (U+2603) from both ends", + ), + # 4-byte UTF-8 character (U+1F600, 😀). + TrimTest( + "enc_4byte_trim", + input="\U0001f600hello\U0001f600", + chars="\U0001f600", + expected="hello", + msg="$trim should trim 4-byte UTF-8 character 😀 (U+1F600) from both ends", + ), + # Mixed multi-byte characters in chars, each treated individually. + TrimTest( + "enc_mixed_multibyte", + input="\u00e9\u2603\U0001f600hello\U0001f600\u2603\u00e9", + chars="\U0001f600\u00e9\u2603", + expected="hello", + msg="$trim should trim mixed multi-byte characters individually from both ends", + ), + # Partial bytes do not match. Trimming "é" (U+00E9) should not affect "e" (U+0065). + TrimTest( + "enc_partial_no_match", + input="ehelloe", + chars="\u00e9", + expected="ehelloe", + msg="$trim should not match partial byte sequences (é vs e)", + ), + # Regex-special characters treated as literals. + TrimTest( + "enc_regex_special", + input=".*\\hello\\*.", + chars="\\*.", + expected="hello", + msg="$trim should treat regex-special characters as literals", + ), + TrimTest( + "enc_regex_plus", + input="+++hello+++", + chars="+", + expected="hello", + msg="$trim should treat + as a literal character", + ), + TrimTest( + "enc_regex_question", + input="???hello???", + chars="?", + expected="hello", + msg="$trim should treat ? as a literal character", + ), + TrimTest( + "enc_regex_brackets", + input="(([hello(([", + chars="([", + expected="hello", + msg="$trim should treat ( and [ as literal characters", + ), + # Trimming decomposed form: "e" in chars matches the base "e" independently of the + # following combining mark. Both the leading "e" and trailing "e" are trimmed. + TrimTest( + "enc_base_char_strips_from_decomposed", + input="e\u0301helloe", + chars="e", + expected="\u0301hello", + msg="$trim should trim base char independently from decomposed sequence", + ), + # Combining mark (U+0301) is a valid code point and can be trimmed independently of its + # base character. + TrimTest( + "enc_combining_mark_trim", + input="e\u0301helloe\u0301", + chars="\u0301", + expected="e\u0301helloe", + msg="$trim should trim combining mark (U+0301) only where it appears independently", + ), + # Precomposed é in input, decomposed chars "e" + combining acute. Each char in chars is + # individual, so neither "e" nor U+0301 matches U+00E9. + TrimTest( + "enc_decomposed_chars_precomposed_input", + input="\u00e9hello\u00e9", + chars="e\u0301", + expected="\u00e9hello\u00e9", + msg="$trim should not match precomposed é with decomposed chars e+combining accent", + ), + # Case sensitivity: uppercase and lowercase are distinct code points. + TrimTest( + "enc_case_lower_no_match", + input="ABChelloABC", + chars="abc", + expected="ABChelloABC", + msg="$trim should not trim uppercase when chars contains lowercase", + ), + TrimTest( + "enc_case_upper_no_match", + input="abchelloabc", + chars="ABC", + expected="abchelloabc", + msg="$trim should not trim lowercase when chars contains uppercase", + ), + TrimTest( + "enc_case_exact_match", + input="AbChelloAbC", + chars="AbC", + expected="hello", + msg="$trim should trim when case matches exactly", + ), + # Greek case sensitivity. + TrimTest( + "enc_case_greek", + input="\u03c3hello\u03c3", + chars="\u03a3", + expected="\u03c3hello\u03c3", + msg="$trim should not fold Greek lowercase σ to uppercase Σ", + ), + # German sharp s (U+00DF) trimmed as single code point. + TrimTest( + "enc_sharp_s", + input="\u00df\u00dfhello\u00df\u00df", + chars="\u00df", + expected="hello", + msg="$trim should trim ß (U+00DF) as a single code point from both ends", + ), + # Surrogate-adjacent code points handled correctly. + TrimTest( + "enc_surrogate_adj_d7ff", + input="\ud7ffhello\ud7ff", + chars="\ud7ff", + expected="hello", + msg="$trim should handle surrogate-adjacent code point U+D7FF", + ), + TrimTest( + "enc_surrogate_adj_e000", + input="\ue000hello\ue000", + chars="\ue000", + expected="hello", + msg="$trim should handle surrogate-adjacent code point U+E000", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_ENCODING_TESTS)) +def test_trim_encoding(collection, test_case: TrimTest): + """Test $trim encoding and character handling.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py new file mode 100644 index 00000000..9f842881 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.expression_test_case import ExpressionTestCase +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_expression_with_insert, +) + +# Property [Expression Arguments]: input and chars accept any expression that resolves to a +# string. Nested $trim expressions are also accepted. +TRIM_EXPR_TESTS: list[TrimTest] = [ + # input is an expression. + TrimTest( + "expr_input_concat", + input={"$concat": [" ", "hello", " "]}, + expected="hello", + msg="$trim should accept $concat expression as input", + ), + # chars is an expression. + TrimTest( + "expr_chars_concat", + input="aaahelloaaa", + chars={"$concat": ["a"]}, + expected="hello", + msg="$trim should accept $concat expression as chars", + ), + # Both input and chars are expressions. + TrimTest( + "expr_both", + input={"$concat": ["xx", "hello", "xx"]}, + chars={"$concat": ["x"]}, + expected="hello", + msg="$trim should accept expressions for both input and chars", + ), + # Nested $trim as input to another $trim. + TrimTest( + "expr_nested_trim", + input={"$trim": {"input": " aahelloaa "}}, + chars="a", + expected="hello", + msg="$trim should accept nested $trim as input expression", + ), + # $literal for dollar-prefixed strings. + TrimTest( + "expr_literal_dollar", + input={"$literal": "$$$hello$$$"}, + chars={"$literal": "$"}, + expected="hello", + msg="$trim should accept $literal for dollar-prefixed strings", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_EXPR_TESTS)) +def test_trim_input_forms(collection, test_case: TrimTest): + """Test $trim input forms.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Document Field References]: $trim works with field references +# from inserted documents, not just inline literals. +TRIM_FIELD_REF_TESTS: list[ExpressionTestCase] = [ + # Object expression: both input and chars from simple field paths. + ExpressionTestCase( + "field_object", + expression={"$trim": {"input": "$s", "chars": "$c"}}, + doc={"s": "aaahelloaaa", "c": "a"}, + expected="hello", + msg="$trim should accept input and chars from document field paths", + ), + # Composite array: both from $arrayElemAt on a projected array-of-objects field. + ExpressionTestCase( + "field_composite_array", + expression={ + "$trim": { + "input": {"$arrayElemAt": ["$a.b", 0]}, + "chars": {"$arrayElemAt": ["$a.b", 1]}, + } + }, + doc={"a": [{"b": "aaahelloaaa"}, {"b": "a"}]}, + expected="hello", + msg="$trim should accept input and chars from composite array field paths", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_FIELD_REF_TESTS)) +def test_trim_field_refs(collection, test_case: ExpressionTestCase): + """Test $trim with document field references.""" + result = execute_expression_with_insert(collection, test_case.expression, test_case.doc) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py new file mode 100644 index 00000000..32437b17 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FAILED_TO_PARSE_ERROR, + INVALID_DOLLAR_FIELD_PATH, + TRIM_MISSING_INPUT_ERROR, + TRIM_UNKNOWN_FIELD_ERROR, + TRIM_WRONG_TYPE_ERROR, +) +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Syntax Validation]: invalid $trim object shapes produce errors. +TRIM_SYNTAX_ERROR_TESTS: list[TrimTest] = [ + # Non-document arguments produce TRIM_WRONG_TYPE_ERROR. + TrimTest( + "syntax_string", + expr={"$trim": "hello"}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject string as argument", + ), + TrimTest( + "syntax_array", + expr={"$trim": ["hello"]}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject array as argument", + ), + TrimTest( + "syntax_null", + expr={"$trim": None}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject null as argument", + ), + TrimTest( + "syntax_int", + expr={"$trim": 42}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject int as argument", + ), + TrimTest( + "syntax_bool", + expr={"$trim": True}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject boolean as argument", + ), + TrimTest( + "syntax_float", + expr={"$trim": 3.14}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject float as argument", + ), + TrimTest( + "syntax_long", + expr={"$trim": Int64(42)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Int64 as argument", + ), + TrimTest( + "syntax_binary", + expr={"$trim": Binary(b"data")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Binary as argument", + ), + TrimTest( + "syntax_binary_uuid", + expr={"$trim": Binary(b"data", 4)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Binary UUID as argument", + ), + TrimTest( + "syntax_date", + expr={"$trim": datetime(2024, 1, 1, tzinfo=timezone.utc)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject datetime as argument", + ), + TrimTest( + "syntax_decimal128", + expr={"$trim": DECIMAL128_ONE_AND_HALF}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Decimal128 as argument", + ), + TrimTest( + "syntax_maxkey", + expr={"$trim": MaxKey()}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject MaxKey as argument", + ), + TrimTest( + "syntax_minkey", + expr={"$trim": MinKey()}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject MinKey as argument", + ), + TrimTest( + "syntax_objectid", + expr={"$trim": ObjectId("507f1f77bcf86cd799439011")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject ObjectId as argument", + ), + TrimTest( + "syntax_regex", + expr={"$trim": Regex("pattern")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Regex as argument", + ), + TrimTest( + "syntax_timestamp", + expr={"$trim": Timestamp(1, 1)}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Timestamp as argument", + ), + TrimTest( + "syntax_code", + expr={"$trim": Code("function() {}")}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Code as argument", + ), + TrimTest( + "syntax_code_scope", + expr={"$trim": Code("function() {}", {"x": 1})}, + error_code=TRIM_WRONG_TYPE_ERROR, + msg="$trim should reject Code with scope as argument", + ), + # Omitting input entirely. + TrimTest( + "syntax_no_input", + expr={"$trim": {"chars": "a"}}, + error_code=TRIM_MISSING_INPUT_ERROR, + msg="$trim should require the input field", + ), + # Empty object. + TrimTest( + "syntax_empty_object", + expr={"$trim": {}}, + error_code=TRIM_MISSING_INPUT_ERROR, + msg="$trim should reject empty object", + ), + # Unknown extra field. + TrimTest( + "syntax_unknown_field", + expr={"$trim": {"input": "hello", "unknown": 1}}, + error_code=TRIM_UNKNOWN_FIELD_ERROR, + msg="$trim should reject unknown fields in the argument object", + ), + # Case-sensitive field names. + TrimTest( + "syntax_case_sensitive_field", + expr={"$trim": {"Input": "hello"}}, + error_code=TRIM_UNKNOWN_FIELD_ERROR, + msg="$trim should reject case-mismatched field name 'Input'", + ), +] + +# Property [Dollar Sign Error]: a bare "$" is interpreted as a field path and "$$" is interpreted +# as an empty variable name. +TRIM_DOLLAR_SIGN_ERROR_TESTS: list[TrimTest] = [ + TrimTest( + "dollar_bare_input", + input="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$trim should reject bare '$' as input field path", + ), + TrimTest( + "dollar_bare_chars", + input="hello", + chars="$", + error_code=INVALID_DOLLAR_FIELD_PATH, + msg="$trim should reject bare '$' as chars field path", + ), + TrimTest( + "dollar_double_input", + input="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$trim should reject '$$' as empty variable name in input", + ), + TrimTest( + "dollar_double_chars", + input="hello", + chars="$$", + error_code=FAILED_TO_PARSE_ERROR, + msg="$trim should reject '$$' as empty variable name in chars", + ), +] + +TRIM_INVALID_ARGS_ALL_TESTS = TRIM_SYNTAX_ERROR_TESTS + TRIM_DOLLAR_SIGN_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_INVALID_ARGS_ALL_TESTS)) +def test_trim_invalid_args(collection, test_case: TrimTest): + """Test $trim invalid arguments.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py new file mode 100644 index 00000000..39a3bccc --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py @@ -0,0 +1,305 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult, assertSuccess +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + _OMIT, + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) + +# Property [Identity]: empty string chars is the identity element. The result equals input +# unchanged. +TRIM_IDENTITY_TESTS: list[TrimTest] = [ + TrimTest( + "identity_plain", + input="hello", + chars="", + expected="hello", + msg="$trim should return input unchanged when chars is empty", + ), + TrimTest( + "identity_surrounding_spaces", + input=" hello ", + chars="", + expected=" hello ", + msg="$trim should preserve surrounding spaces when chars is empty", + ), + TrimTest( + "identity_empty_input", + input="", + chars="", + expected="", + msg="$trim should return empty string when both input and chars are empty", + ), + TrimTest( + "identity_repeated", + input="aaahelloaaa", + chars="", + expected="aaahelloaaa", + msg="$trim should preserve surrounding chars when chars is empty", + ), + TrimTest( + "identity_unicode", + input="日本語", + chars="", + expected="日本語", + msg="$trim should return Unicode input unchanged when chars is empty", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_IDENTITY_TESTS)) +def test_trim_identity(collection, test_case: TrimTest): + """Test $trim identity cases.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) + + +# Property [Idempotency]: applying $trim twice with the same chars yields the same result as +# applying it once. +TRIM_IDEMPOTENCY_TESTS: list[TrimTest] = [ + TrimTest( + "idempotent_default", + input=" hello ", + expected="hello", + msg="$trim should be idempotent with default whitespace trimming", + ), + TrimTest( + "idempotent_custom", + input="aaahelloaaa", + chars="a", + expected="hello", + msg="$trim should be idempotent with custom chars", + ), + TrimTest( + "idempotent_mixed_whitespace", + input=" \t\nhello\n\t ", + expected="hello", + msg="$trim should be idempotent with mixed whitespace", + ), + TrimTest( + "idempotent_no_trim", + input="hello", + expected="hello", + msg="$trim should be idempotent when no trimming occurs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_IDEMPOTENCY_TESTS)) +def test_trim_idempotency(collection, test_case: TrimTest): + """Test $trim idempotency.""" + once = _expr(test_case) + twice = {"$trim": {"input": once}} + if test_case.chars is not _OMIT: + twice["$trim"]["chars"] = test_case.chars + result = execute_project(collection, {"once": once, "twice": twice}) + assertSuccess( + result, [{"once": test_case.expected, "twice": test_case.expected}], msg=test_case.msg + ) + + +# Property [Substring Invariant]: the result is always a contiguous substring of the original +# input string. +TRIM_SUBSTRING_INVARIANT_TESTS: list[TrimTest] = [ + TrimTest( + "substring_default_trim", + input=" hello ", + msg="$trim result should be a contiguous substring of input after default trimming", + ), + TrimTest( + "substring_custom_chars", + input="aaahelloaaa", + chars="a", + msg="$trim result should be a contiguous substring of input after custom char trimming", + ), + TrimTest( + "substring_no_trim_needed", + input="hello", + msg="$trim result should be a contiguous substring of input when no trimming needed", + ), + TrimTest( + "substring_all_trimmed", + input=" ", + msg="$trim result should be a contiguous substring of input when all chars trimmed", + ), + TrimTest( + "substring_mixed_whitespace", + input="\t\nhello world\n\t", + msg="$trim result should be a contiguous substring of input with mixed whitespace", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_SUBSTRING_INVARIANT_TESTS)) +def test_trim_substring_invariant(collection, test_case: TrimTest): + """Test $trim result is always a contiguous substring of the original input.""" + trim_result = _expr(test_case) + result = execute_project( + collection, + { + "isSubstring": { + "$cond": { + "if": {"$eq": [{"$strLenCP": trim_result}, 0]}, + "then": True, + "else": {"$gte": [{"$indexOfCP": [test_case.input, trim_result]}, 0]}, + } + } + }, + ) + assertSuccess(result, [{"isSubstring": True}], msg=test_case.msg) + + +# Property [First Char Invariant]: the first character of a non-empty result is not a member of +# the trim character set. Only tested with custom chars where membership can be checked +# server-side via $indexOfCP. +TRIM_FIRST_CHAR_INVARIANT_TESTS: list[TrimTest] = [ + TrimTest( + "first_char_single", + input="aaahelloaaa", + chars="a", + msg="$trim result's first char should not be in single-char trim set", + ), + TrimTest( + "first_char_multi", + input="abcdefabc", + chars="abc", + msg="$trim result's first char should not be in multi-char trim set", + ), + TrimTest( + "first_char_all_leading", + input="xyzabcxyz", + chars="xyz", + msg="$trim result's first char should not be in trim set after full trim", + ), + TrimTest( + "first_char_no_trim", + input="hello", + chars="xyz", + msg="$trim result's first char should not be in trim set when no trimming occurs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_FIRST_CHAR_INVARIANT_TESTS)) +def test_trim_first_char_invariant(collection, test_case: TrimTest): + """Test $trim result's first character is not in the trim set.""" + trim_result = _expr(test_case) + result_len = {"$strLenCP": trim_result} + first_char = {"$substrCP": [trim_result, 0, 1]} + result = execute_project( + collection, + { + "firstCharNotInChars": { + "$cond": { + "if": {"$gt": [result_len, 0]}, + "then": {"$eq": [{"$indexOfCP": [test_case.chars, first_char]}, -1]}, + "else": True, + } + }, + }, + ) + assertSuccess(result, [{"firstCharNotInChars": True}], msg=test_case.msg) + + +# Property [Last Char Invariant]: the last character of a non-empty result is not a member of +# the trim character set. Only tested with custom chars where membership can be checked +# server-side via $indexOfCP. +TRIM_LAST_CHAR_INVARIANT_TESTS: list[TrimTest] = [ + TrimTest( + "last_char_single", + input="aaahelloaaa", + chars="a", + msg="$trim result's last char should not be in single-char trim set", + ), + TrimTest( + "last_char_multi", + input="abcdefabc", + chars="abc", + msg="$trim result's last char should not be in multi-char trim set", + ), + TrimTest( + "last_char_all_trailing", + input="xyzabcxyz", + chars="xyz", + msg="$trim result's last char should not be in trim set after full trim", + ), + TrimTest( + "last_char_no_trim", + input="hello", + chars="xyz", + msg="$trim result's last char should not be in trim set when no trimming occurs", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_LAST_CHAR_INVARIANT_TESTS)) +def test_trim_last_char_invariant(collection, test_case: TrimTest): + """Test $trim result's last character is not in the trim set.""" + trim_result = _expr(test_case) + result_len = {"$strLenCP": trim_result} + last_char = {"$substrCP": [trim_result, {"$subtract": [result_len, 1]}, 1]} + result = execute_project( + collection, + { + "lastCharNotInChars": { + "$cond": { + "if": {"$gt": [result_len, 0]}, + "then": {"$eq": [{"$indexOfCP": [test_case.chars, last_char]}, -1]}, + "else": True, + } + }, + }, + ) + assertSuccess(result, [{"lastCharNotInChars": True}], msg=test_case.msg) + + +# Property [Return Type]: the result is always a string when the expression succeeds and no null +# propagation occurs. +TRIM_RETURN_TYPE_TESTS: list[TrimTest] = [ + TrimTest( + "return_type_default_trim", + input=" hello ", + msg="$trim should return string type after default trimming", + ), + TrimTest( + "return_type_custom_no_match", + input="hello", + chars="x", + msg="$trim should return string type when custom chars don't match", + ), + TrimTest( + "return_type_custom_trim", + input="aaahelloaaa", + chars="a", + msg="$trim should return string type after custom char trimming", + ), + TrimTest("return_type_empty", input="", msg="$trim should return string type for empty input"), + TrimTest( + "return_type_all_whitespace", + input=" ", + msg="$trim should return string type when all whitespace is trimmed", + ), + TrimTest( + "return_type_unicode", + input="日本語", + chars="日", + msg="$trim should return string type after Unicode char trimming", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_RETURN_TYPE_TESTS)) +def test_trim_return_type(collection, test_case: TrimTest): + """Test $trim result is always type string.""" + result = execute_expression(collection, {"$type": _expr(test_case)}) + assertSuccess(result, [{"result": "string"}], msg=test_case.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py new file mode 100644 index 00000000..fac8731d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + _OMIT, + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Argument shapes for null/missing tests. _PLACEHOLDER is replaced with None or MISSING. +_PLACEHOLDER = object() +_NULL_PATTERNS = [ + (_PLACEHOLDER, _OMIT, "input_default_chars", "input is {kind} with default chars"), + (_PLACEHOLDER, "abc", "input_custom_chars", "input is {kind} with custom chars"), + ("hello", _PLACEHOLDER, "chars_valid_input", "chars is {kind} with valid string input"), + (_PLACEHOLDER, _PLACEHOLDER, "both", "both input and chars are {kind}"), + # Null/missing input takes precedence over non-string chars (no error raised). + (_PLACEHOLDER, 123, "precedence_chars_int", "input is {kind} even with non-string chars"), +] + + +def _build_null_tests(null_value, prefix) -> list[TrimTest]: + return [ + TrimTest( + f"{prefix}_{suffix}", + input=null_value if _input is _PLACEHOLDER else _input, + chars=null_value if _chars is _PLACEHOLDER else _chars, + expected=None, + msg=f"$trim should return null when {msg_tmpl.format(kind=prefix)}", + ) + for _input, _chars, suffix, msg_tmpl in _NULL_PATTERNS + ] + + +# Property [Null Propagation]: when either input or chars is null, the result is null. +TRIM_NULL_TESTS = _build_null_tests(None, "null") + +# Property [Null Propagation - missing]: missing fields are treated as null. +TRIM_MISSING_TESTS = _build_null_tests(MISSING, "missing") + +# Property [Null Propagation - mixed null and missing]: combining null and missing across +# positions still produces null. +TRIM_MIXED_NULL_TESTS: list[TrimTest] = [ + TrimTest( + "mixed_null_input_missing_chars", + input=None, + chars=MISSING, + expected=None, + msg="$trim should return null when input is null and chars is missing", + ), + TrimTest( + "mixed_missing_input_null_chars", + input=MISSING, + chars=None, + expected=None, + msg="$trim should return null when input is missing and chars is null", + ), +] + +TRIM_NULL_ALL_TESTS = TRIM_NULL_TESTS + TRIM_MISSING_TESTS + TRIM_MIXED_NULL_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_NULL_ALL_TESTS)) +def test_trim_null(collection, test_case: TrimTest): + """Test $trim null propagation.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py new file mode 100644 index 00000000..b370007a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import pytest + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [String Size Limit - Success]: input one byte under the limit is accepted. +TRIM_SIZE_LIMIT_SUCCESS_TESTS: list[TrimTest] = [ + TrimTest( + "size_one_under", + input="a" * (STRING_SIZE_LIMIT_BYTES - 1), + expected="a" * (STRING_SIZE_LIMIT_BYTES - 1), + msg="$trim should accept input one byte under the size limit", + ), + TrimTest( + "size_one_under_2byte", + input="\u00e9" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a", + expected="\u00e9" * ((STRING_SIZE_LIMIT_BYTES - 1) // 2) + "a", + msg="$trim should accept 2-byte character input one byte under the size limit", + ), + # Large input with many leading and trailing trim characters, just under the limit. + TrimTest( + "size_trim_both_sides", + input="a" * ((STRING_SIZE_LIMIT_BYTES - 6) // 2) + + "hello" + + "a" * ((STRING_SIZE_LIMIT_BYTES - 6) // 2), + chars="a", + expected="hello", + msg="$trim should trim many characters from both sides near the size limit", + ), +] + + +# Property [String Size Limit - Error]: input at the BSON string byte limit produces an error. +TRIM_SIZE_LIMIT_ERROR_TESTS: list[TrimTest] = [ + TrimTest( + "size_at_limit", + input="a" * STRING_SIZE_LIMIT_BYTES, + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$trim should reject input at the BSON string byte limit", + ), + TrimTest( + "size_at_limit_2byte", + input="\u00e9" * (STRING_SIZE_LIMIT_BYTES // 2), + error_code=STRING_SIZE_LIMIT_ERROR, + msg="$trim should reject 2-byte character input at the BSON string byte limit", + ), +] + +TRIM_SIZE_LIMIT_ALL_TESTS = TRIM_SIZE_LIMIT_SUCCESS_TESTS + TRIM_SIZE_LIMIT_ERROR_TESTS + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_SIZE_LIMIT_ALL_TESTS)) +def test_trim_size_limit(collection, test_case: TrimTest): + """Test $trim string size limit.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py new file mode 100644 index 00000000..bd9c81ff --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py @@ -0,0 +1,279 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp + +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import TRIM_CHARS_TYPE_ERROR, TRIM_INPUT_TYPE_ERROR +from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF, MISSING +from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + TrimTest, + _expr, +) +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression + +# Property [Type Strictness - input]: non-string, non-null input produces TRIM_INPUT_TYPE_ERROR. +TRIM_INPUT_TYPE_ERROR_TESTS: list[TrimTest] = [ + TrimTest( + "type_input_array", + input=["a"], + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject array as input", + ), + TrimTest( + "type_input_binary", + input=Binary(b"data"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject Binary as input", + ), + TrimTest( + "type_input_bool", + input=True, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject boolean as input", + ), + TrimTest( + "type_input_date", + input=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject datetime as input", + ), + TrimTest( + "type_input_decimal128", + input=DECIMAL128_ONE_AND_HALF, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject Decimal128 as input", + ), + TrimTest( + "type_input_float", + input=3.14, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject float as input", + ), + TrimTest( + "type_input_int", + input=42, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject int as input", + ), + TrimTest( + "type_input_long", + input=Int64(42), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject Int64 as input", + ), + TrimTest( + "type_input_maxkey", + input=MaxKey(), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject MaxKey as input", + ), + TrimTest( + "type_input_minkey", + input=MinKey(), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject MinKey as input", + ), + TrimTest( + "type_input_object", + input={"a": 1}, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject object as input", + ), + TrimTest( + "type_input_objectid", + input=ObjectId("507f1f77bcf86cd799439011"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject ObjectId as input", + ), + TrimTest( + "type_input_regex", + input=Regex("pattern"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject Regex as input", + ), + TrimTest( + "type_input_timestamp", + input=Timestamp(1, 1), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject Timestamp as input", + ), + TrimTest( + "type_input_code", + input=Code("function() {}"), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject Code as input", + ), + TrimTest( + "type_input_code_scope", + input=Code("function() {}", {"x": 1}), + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject Code with scope as input", + ), +] + +# Property [Type Strictness - chars]: non-string, non-null chars produces TRIM_CHARS_TYPE_ERROR. +TRIM_CHARS_TYPE_ERROR_TESTS: list[TrimTest] = [ + TrimTest( + "type_chars_array", + input="hello", + chars=["a"], + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject array as chars", + ), + TrimTest( + "type_chars_binary", + input="hello", + chars=Binary(b"data"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject Binary as chars", + ), + TrimTest( + "type_chars_bool", + input="hello", + chars=True, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject boolean as chars", + ), + TrimTest( + "type_chars_date", + input="hello", + chars=datetime(2024, 1, 1, tzinfo=timezone.utc), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject datetime as chars", + ), + TrimTest( + "type_chars_decimal128", + input="hello", + chars=DECIMAL128_ONE_AND_HALF, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject Decimal128 as chars", + ), + TrimTest( + "type_chars_float", + input="hello", + chars=3.14, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject float as chars", + ), + TrimTest( + "type_chars_int", + input="hello", + chars=42, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject int as chars", + ), + TrimTest( + "type_chars_long", + input="hello", + chars=Int64(42), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject Int64 as chars", + ), + TrimTest( + "type_chars_maxkey", + input="hello", + chars=MaxKey(), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject MaxKey as chars", + ), + TrimTest( + "type_chars_minkey", + input="hello", + chars=MinKey(), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject MinKey as chars", + ), + TrimTest( + "type_chars_object", + input="hello", + chars={"a": 1}, + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject object as chars", + ), + TrimTest( + "type_chars_objectid", + input="hello", + chars=ObjectId("507f1f77bcf86cd799439011"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject ObjectId as chars", + ), + TrimTest( + "type_chars_regex", + input="hello", + chars=Regex("pattern"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject Regex as chars", + ), + TrimTest( + "type_chars_timestamp", + input="hello", + chars=Timestamp(1, 1), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject Timestamp as chars", + ), + TrimTest( + "type_chars_code", + input="hello", + chars=Code("function() {}"), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject Code as chars", + ), + TrimTest( + "type_chars_code_scope", + input="hello", + chars=Code("function() {}", {"x": 1}), + error_code=TRIM_CHARS_TYPE_ERROR, + msg="$trim should reject Code with scope as chars", + ), +] + +# Property [Type Strictness - precedence]: when both input and chars are non-string, the input +# type error takes precedence. +TRIM_TYPE_PRECEDENCE_TESTS: list[TrimTest] = [ + TrimTest( + "type_precedence_both_int", + input=123, + chars=456, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should report input type error before chars type error when both are invalid", + ), +] + +# Property [Type Strictness - input with null chars]: non-string input errors even when chars is +# null or missing. +TRIM_INPUT_TYPE_NULL_CHARS_TESTS: list[TrimTest] = [ + TrimTest( + "type_input_int_chars_null", + input=123, + chars=None, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject non-string input even when chars is null", + ), + TrimTest( + "type_input_int_chars_missing", + input=123, + chars=MISSING, + error_code=TRIM_INPUT_TYPE_ERROR, + msg="$trim should reject non-string input even when chars is missing", + ), +] + +TRIM_TYPE_ERROR_ALL_TESTS = ( + TRIM_INPUT_TYPE_ERROR_TESTS + + TRIM_CHARS_TYPE_ERROR_TESTS + + TRIM_TYPE_PRECEDENCE_TESTS + + TRIM_INPUT_TYPE_NULL_CHARS_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(TRIM_TYPE_ERROR_ALL_TESTS)) +def test_trim_type_errors(collection, test_case: TrimTest): + """Test $trim type errors.""" + result = execute_expression(collection, _expr(test_case)) + assertResult( + result, expected=test_case.expected, error_code=test_case.error_code, msg=test_case.msg + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/utils/trim_common.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/utils/trim_common.py new file mode 100644 index 00000000..70358942 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/utils/trim_common.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from documentdb_tests.framework.test_case import BaseTestCase + +# Sentinel for "omit this parameter from the expression." Distinct from None (which means pass +# null) and MISSING (which means reference a missing field). +_OMIT = object() + + +@dataclass(frozen=True) +class TrimTest(BaseTestCase): + """Test case for $trim operator.""" + + input: Any = None + chars: Any = _OMIT + expr: Any = None # Raw expression override for syntax tests + + +def _expr(test_case: TrimTest) -> dict[str, Any]: + if test_case.expr is not None: + return cast(dict[str, Any], test_case.expr) + params: dict[str, Any] = {"input": test_case.input} + if test_case.chars is not _OMIT: + params["chars"] = test_case.chars + return {"$trim": params} From e77ad9a2ae1db09fdda0e03d9932d2614a785652 Mon Sep 17 00:00:00 2001 From: Yunxuan Shi Date: Thu, 9 Apr 2026 16:46:48 -0700 Subject: [PATCH 2/2] Add missing dependencies for trim variant tests - Add __init__.py for package resolution - Add ExpressionTestCase to utils/ - Add TRIM_*, FAILED_TO_PARSE_ERROR, INVALID_DOLLAR_FIELD_PATH, BSON_TO_STRING_CONVERSION_ERROR, STRING_SIZE_LIMIT_ERROR to error_codes.py - Add STRING_SIZE_LIMIT_BYTES to test_constants.py - Fix pytest_params import (parametrize module) - Use relative imports for operator common utils - Pin CI MongoDB to 8.2.4 - Run isort/black formatting Signed-off-by: Yunxuan Shi --- .github/workflows/pr-tests.yml | 2 +- .../compatibility/tests/core/__init__.py | 0 .../tests/core/operator/__init__.py | 0 .../tests/core/operator/expressions/__init__.py | 0 .../core/operator/expressions/string/__init__.py | 0 .../expressions/string/ltrim/__init__.py | 0 .../string/ltrim/test_ltrim_custom_chars.py | 9 ++++++--- .../string/ltrim/test_ltrim_default_trim.py | 9 ++++++--- .../string/ltrim/test_ltrim_encoding.py | 9 ++++++--- .../string/ltrim/test_ltrim_input_forms.py | 15 ++++++++------- .../string/ltrim/test_ltrim_invalid_args.py | 9 ++++++--- .../string/ltrim/test_ltrim_invariants.py | 13 +++++++------ .../expressions/string/ltrim/test_ltrim_null.py | 9 ++++++--- .../string/ltrim/test_ltrim_size_limit.py | 9 ++++++--- .../string/ltrim/test_ltrim_type_errors.py | 9 ++++++--- .../expressions/string/rtrim/__init__.py | 0 .../string/rtrim/test_rtrim_custom_chars.py | 9 ++++++--- .../string/rtrim/test_rtrim_default_trim.py | 9 ++++++--- .../string/rtrim/test_rtrim_encoding.py | 9 ++++++--- .../string/rtrim/test_rtrim_input_forms.py | 15 ++++++++------- .../string/rtrim/test_rtrim_invalid_args.py | 9 ++++++--- .../string/rtrim/test_rtrim_invariants.py | 13 +++++++------ .../expressions/string/rtrim/test_rtrim_null.py | 9 ++++++--- .../string/rtrim/test_rtrim_size_limit.py | 9 ++++++--- .../string/rtrim/test_rtrim_type_errors.py | 9 ++++++--- .../operator/expressions/string/trim/__init__.py | 0 .../string/trim/test_trim_custom_chars.py | 9 ++++++--- .../string/trim/test_trim_default_trim.py | 9 ++++++--- .../string/trim/test_trim_encoding.py | 9 ++++++--- .../string/trim/test_trim_input_forms.py | 15 ++++++++------- .../string/trim/test_trim_invalid_args.py | 9 ++++++--- .../string/trim/test_trim_invariants.py | 13 +++++++------ .../expressions/string/trim/test_trim_null.py | 9 ++++++--- .../string/trim/test_trim_size_limit.py | 9 ++++++--- .../string/trim/test_trim_type_errors.py | 9 ++++++--- .../expressions/utils/expression_test_case.py | 16 ++++++++++++++++ documentdb_tests/framework/error_codes.py | 8 ++++++++ documentdb_tests/framework/test_constants.py | 1 + 38 files changed, 197 insertions(+), 103 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/expressions/utils/expression_test_case.py diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index ccbf21ac..37026f58 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -11,7 +11,7 @@ jobs: services: mongodb: - image: mongo:8.2 + image: mongo:8.2.4 ports: - 27017:27017 options: >- diff --git a/documentdb_tests/compatibility/tests/core/__init__.py b/documentdb_tests/compatibility/tests/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/__init__.py b/documentdb_tests/compatibility/tests/core/operator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py index 0200612f..a8e21fda 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_custom_chars.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.ltrim_common import ( LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Custom Chars]: when chars is provided, only those characters are trimmed from the # leading edge. Each character in chars is treated individually (not as a substring), and the diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py index 31183758..0b848e45 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_default_trim.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.ltrim_common import ( LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Default Trimming]: when chars is omitted, leading whitespace is trimmed. The default # set includes ASCII whitespace (space, tab, newline, carriage return, form feed, vertical tab), diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py index 001d5af8..72a14b80 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_encoding.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.ltrim_common import ( LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Encoding and Character Handling]: trimming operates on whole Unicode code points, # not bytes or substrings. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py index cbca9d28..689c4733 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_input_forms.py @@ -2,17 +2,18 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( - LtrimTest, - _expr, -) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.expression_test_case import ExpressionTestCase from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( execute_expression, execute_expression_with_insert, ) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.parametrize import pytest_params + +from ...utils.expression_test_case import ExpressionTestCase +from .utils.ltrim_common import ( + LtrimTest, + _expr, +) # Property [Expression Arguments]: input and chars accept any expression that resolves to a # string. Nested $ltrim expressions are also accepted. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py index dedbe58e..db073c7f 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invalid_args.py @@ -5,6 +5,9 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import ( FAILED_TO_PARSE_ERROR, @@ -13,13 +16,13 @@ TRIM_UNKNOWN_FIELD_ERROR, TRIM_WRONG_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + +from .utils.ltrim_common import ( LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Syntax Validation]: invalid $ltrim object shapes produce errors. LTRIM_SYNTAX_ERROR_TESTS: list[LtrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py index b14ea30d..4691c8b6 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_invariants.py @@ -2,17 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) from documentdb_tests.framework.assertions import assertResult, assertSuccess -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.ltrim_common import ( _OMIT, LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( - execute_expression, - execute_project, -) # Property [Identity]: empty string chars is the identity element. The result equals input # unchanged. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py index 5c460c0f..e9a49fd2 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_null.py @@ -2,15 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + +from .utils.ltrim_common import ( _OMIT, LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Argument shapes for null/missing tests. _PLACEHOLDER is replaced with None or MISSING. _PLACEHOLDER = object() diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py index bc17e2eb..65df7fef 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_size_limit.py @@ -2,15 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + +from .utils.ltrim_common import ( LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [String Size Limit - Success]: input one byte under the limit is accepted. LTRIM_SIZE_LIMIT_SUCCESS_TESTS: list[LtrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py index 364eea77..b0912df7 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/ltrim/test_ltrim_type_errors.py @@ -5,15 +5,18 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import TRIM_CHARS_TYPE_ERROR, TRIM_INPUT_TYPE_ERROR -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF, MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.ltrim.utils.ltrim_common import ( + +from .utils.ltrim_common import ( LtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Strictness - input]: non-string, non-null input produces TRIM_INPUT_TYPE_ERROR. LTRIM_INPUT_TYPE_ERROR_TESTS: list[LtrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py index 4d041343..a82a5154 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_custom_chars.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.rtrim_common import ( RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Custom Chars]: when chars is provided, only those characters are trimmed from the # trailing edge. Each character in chars is treated individually (not as a substring), and the diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py index ed8219f2..955d376a 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_default_trim.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.rtrim_common import ( RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Default Trimming]: when chars is omitted, trailing whitespace is trimmed. The default # set includes ASCII whitespace (space, tab, newline, carriage return, form feed, vertical tab), diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py index 764f2e9b..b5315ca6 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_encoding.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.rtrim_common import ( RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Encoding and Character Handling]: trimming operates on whole Unicode code points, # not bytes or substrings. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py index 3d6a2fdc..1f07a404 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_input_forms.py @@ -2,17 +2,18 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( - RtrimTest, - _expr, -) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.expression_test_case import ExpressionTestCase from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( execute_expression, execute_expression_with_insert, ) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.parametrize import pytest_params + +from ...utils.expression_test_case import ExpressionTestCase +from .utils.rtrim_common import ( + RtrimTest, + _expr, +) # Property [Expression Arguments]: input and chars accept any expression that resolves to a # string. Nested $rtrim expressions are also accepted. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py index f1a39336..c3c59da7 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invalid_args.py @@ -5,6 +5,9 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import ( FAILED_TO_PARSE_ERROR, @@ -13,13 +16,13 @@ TRIM_UNKNOWN_FIELD_ERROR, TRIM_WRONG_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + +from .utils.rtrim_common import ( RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Syntax Validation]: invalid $rtrim object shapes produce errors. RTRIM_SYNTAX_ERROR_TESTS: list[RtrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py index c17666f2..275a6a92 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_invariants.py @@ -2,17 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) from documentdb_tests.framework.assertions import assertResult, assertSuccess -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.rtrim_common import ( _OMIT, RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( - execute_expression, - execute_project, -) # Property [Identity]: empty string chars is the identity element. The result equals input # unchanged. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py index b6f3139f..5b713e98 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_null.py @@ -2,15 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + +from .utils.rtrim_common import ( _OMIT, RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Argument shapes for null/missing tests. _PLACEHOLDER is replaced with None or MISSING. _PLACEHOLDER = object() diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py index 5b69743f..dee55232 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_size_limit.py @@ -2,15 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + +from .utils.rtrim_common import ( RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [String Size Limit - Success]: input one byte under the limit is accepted. RTRIM_SIZE_LIMIT_SUCCESS_TESTS: list[RtrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py index 99ab19d9..edf0bcf5 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/rtrim/test_rtrim_type_errors.py @@ -5,15 +5,18 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import TRIM_CHARS_TYPE_ERROR, TRIM_INPUT_TYPE_ERROR -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF, MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.rtrim.utils.rtrim_common import ( + +from .utils.rtrim_common import ( RtrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Strictness - input]: non-string, non-null input produces TRIM_INPUT_TYPE_ERROR. RTRIM_INPUT_TYPE_ERROR_TESTS: list[RtrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py index 42cb75ed..f920d78c 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_custom_chars.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.trim_common import ( TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Custom Chars]: when chars is provided, only those characters are trimmed from both # ends. Each character in chars is treated individually (not as a substring), and the order of diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py index 0d58a92d..96bbf510 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_default_trim.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.trim_common import ( TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Default Trimming]: when chars is omitted, whitespace is trimmed from both ends. The # default set includes ASCII whitespace (space, tab, newline, carriage return, form feed, diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py index 3ba5ac0f..d09ac63a 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_encoding.py @@ -2,13 +2,16 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.trim_common import ( TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Encoding and Character Handling]: trimming operates on whole Unicode code points, # not bytes or substrings. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py index 9f842881..495c94e3 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_input_forms.py @@ -2,17 +2,18 @@ import pytest -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( - TrimTest, - _expr, -) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.expression_test_case import ExpressionTestCase from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( execute_expression, execute_expression_with_insert, ) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.parametrize import pytest_params + +from ...utils.expression_test_case import ExpressionTestCase +from .utils.trim_common import ( + TrimTest, + _expr, +) # Property [Expression Arguments]: input and chars accept any expression that resolves to a # string. Nested $trim expressions are also accepted. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py index 32437b17..58a74f7e 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invalid_args.py @@ -5,6 +5,9 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import ( FAILED_TO_PARSE_ERROR, @@ -13,13 +16,13 @@ TRIM_UNKNOWN_FIELD_ERROR, TRIM_WRONG_TYPE_ERROR, ) -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + +from .utils.trim_common import ( TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Syntax Validation]: invalid $trim object shapes produce errors. TRIM_SYNTAX_ERROR_TESTS: list[TrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py index 39a3bccc..842a5e78 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_invariants.py @@ -2,17 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_project, +) from documentdb_tests.framework.assertions import assertResult, assertSuccess -from documentdb_tests.framework.test_case import pytest_params -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( +from documentdb_tests.framework.parametrize import pytest_params + +from .utils.trim_common import ( _OMIT, TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( - execute_expression, - execute_project, -) # Property [Identity]: empty string chars is the identity element. The result equals input # unchanged. diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py index fac8731d..23d397d6 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_null.py @@ -2,15 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + +from .utils.trim_common import ( _OMIT, TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Argument shapes for null/missing tests. _PLACEHOLDER is replaced with None or MISSING. _PLACEHOLDER = object() diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py index b370007a..ba693beb 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_size_limit.py @@ -2,15 +2,18 @@ import pytest +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import STRING_SIZE_LIMIT_ERROR -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import STRING_SIZE_LIMIT_BYTES -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + +from .utils.trim_common import ( TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [String Size Limit - Success]: input one byte under the limit is accepted. TRIM_SIZE_LIMIT_SUCCESS_TESTS: list[TrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py index bd9c81ff..1568cd92 100644 --- a/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/string/trim/test_trim_type_errors.py @@ -5,15 +5,18 @@ import pytest from bson import Binary, Code, Int64, MaxKey, MinKey, ObjectId, Regex, Timestamp +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, +) from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.error_codes import TRIM_CHARS_TYPE_ERROR, TRIM_INPUT_TYPE_ERROR -from documentdb_tests.framework.test_case import pytest_params +from documentdb_tests.framework.parametrize import pytest_params from documentdb_tests.framework.test_constants import DECIMAL128_ONE_AND_HALF, MISSING -from documentdb_tests.compatibility.tests.core.operator.expressions.string.trim.utils.trim_common import ( + +from .utils.trim_common import ( TrimTest, _expr, ) -from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import execute_expression # Property [Type Strictness - input]: non-string, non-null input produces TRIM_INPUT_TYPE_ERROR. TRIM_INPUT_TYPE_ERROR_TESTS: list[TrimTest] = [ diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/utils/expression_test_case.py b/documentdb_tests/compatibility/tests/core/operator/expressions/utils/expression_test_case.py new file mode 100644 index 00000000..bbdcef82 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/utils/expression_test_case.py @@ -0,0 +1,16 @@ +""" +Shared test case for expression/field path tests. +""" + +from dataclasses import dataclass +from typing import Any, Optional + +from documentdb_tests.framework.test_case import BaseTestCase + + +@dataclass(frozen=True) +class ExpressionTestCase(BaseTestCase): + """Test case for expression and field path tests.""" + + expression: Any = None + doc: Optional[dict] = None diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index c285b5aa..bd9d9a61 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -4,11 +4,14 @@ """ DIVIDE_BY_ZERO_ERROR = 2 +FAILED_TO_PARSE_ERROR = 9 TYPE_MISMATCH_ERROR = 14 EXPRESSION_TYPE_MISMATCH_ERROR = 16020 +STRING_SIZE_LIMIT_ERROR = 16493 MODULO_ZERO_REMAINDER_ERROR = 16610 MODULO_NON_NUMERIC_ERROR = 16611 MORE_THAN_ONE_DATE_ERROR = 16612 +INVALID_DOLLAR_FIELD_PATH = 16872 ABS_OVERFLOW_ERROR = 28680 LOG_NON_NUMERIC_VALUE_ERROR = 28756 LOG_NON_NUMERIC_BASE_ERROR = 28757 @@ -20,4 +23,9 @@ POW_BASE_ZERO_EXP_NEGATIVE_ERROR = 28764 NON_NUMERIC_TYPE_MISMATCH_ERROR = 28765 LN_NON_POSITIVE_INPUT_ERROR = 28766 +TRIM_UNKNOWN_FIELD_ERROR = 50694 +TRIM_MISSING_INPUT_ERROR = 50695 +TRIM_WRONG_TYPE_ERROR = 50696 +TRIM_INPUT_TYPE_ERROR = 50699 +TRIM_CHARS_TYPE_ERROR = 50700 MODULO_DECIMAL128_ZERO_REMAINDER_ERROR = 5733415 diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py index 9e3088ed..d0aa0200 100644 --- a/documentdb_tests/framework/test_constants.py +++ b/documentdb_tests/framework/test_constants.py @@ -64,6 +64,7 @@ # Other constant values MISSING = "$missing" +STRING_SIZE_LIMIT_BYTES = 16 * 1024 * 1024 # Int32 lists NUMERIC_INT32_NEGATIVE = [INT32_UNDERFLOW, INT32_MIN]