Skip to content

Commit 5a182b3

Browse files
BUG: fix empty suffix and prefix handling in pyarrow string methods (pandas-dev#63395)
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent 0e978b6 commit 5a182b3

File tree

2 files changed

+16
-2
lines changed

2 files changed

+16
-2
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,12 +203,16 @@ def _str_swapcase(self) -> Self:
203203
return self._from_pyarrow_array(pc.utf8_swapcase(self._pa_array))
204204

205205
def _str_removeprefix(self, prefix: str):
206+
if prefix == "":
207+
return self._from_pyarrow_array(self._pa_array)
206208
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
207209
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
208210
result = pc.if_else(starts_with, removed, self._pa_array)
209211
return self._from_pyarrow_array(result)
210212

211213
def _str_removesuffix(self, suffix: str):
214+
if suffix == "":
215+
return self._from_pyarrow_array(self._pa_array)
212216
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
213217
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
214218
result = pc.if_else(ends_with, removed, self._pa_array)

pandas/tests/strings/test_strings.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,12 @@ def test_strip_lstrip_rstrip_args(any_string_dtype, method, exp):
544544

545545

546546
@pytest.mark.parametrize(
547-
"prefix, expected", [("a", ["b", " b c", "bc"]), ("ab", ["", "a b c", "bc"])]
547+
"prefix, expected",
548+
[
549+
("a", ["b", " b c", "bc"]),
550+
("ab", ["", "a b c", "bc"]),
551+
("", ["ab", "a b c", "bc"]),
552+
],
548553
)
549554
def test_removeprefix(any_string_dtype, prefix, expected):
550555
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
@@ -554,7 +559,12 @@ def test_removeprefix(any_string_dtype, prefix, expected):
554559

555560

556561
@pytest.mark.parametrize(
557-
"suffix, expected", [("c", ["ab", "a b ", "b"]), ("bc", ["ab", "a b c", ""])]
562+
"suffix, expected",
563+
[
564+
("c", ["ab", "a b ", "b"]),
565+
("bc", ["ab", "a b c", ""]),
566+
("", ["ab", "a b c", "bc"]),
567+
],
558568
)
559569
def test_removesuffix(any_string_dtype, suffix, expected):
560570
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)

0 commit comments

Comments
 (0)