Skip to content

Commit 5c84183

Browse files
fix unhashable UserDict in JSONArray.duplicated
1 parent f4ccbe3 commit 5c84183

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

pandas/tests/extension/json/array.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from typing import (
2626
TYPE_CHECKING,
2727
Any,
28+
Literal,
2829
)
2930

3031
import numpy as np
@@ -41,6 +42,7 @@
4142
ExtensionArray,
4243
ExtensionDtype,
4344
)
45+
from pandas.core.algorithms import duplicated
4446
from pandas.core.indexers import (
4547
getitem_returns_view,
4648
unpack_tuple_and_ellipses,
@@ -49,7 +51,10 @@
4951
if TYPE_CHECKING:
5052
from collections.abc import Mapping
5153

52-
from pandas._typing import type_t
54+
from pandas._typing import (
55+
npt,
56+
type_t,
57+
)
5358

5459

5560
class JSONDtype(ExtensionDtype):
@@ -264,6 +269,17 @@ def _pad_or_backfill(self, *, method, limit=None, copy=True):
264269
# GH#56616 - test EA method without limit_area argument
265270
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
266271

272+
def duplicated(
273+
self, keep: Literal["first", "last", False] = "first"
274+
) -> npt.NDArray[np.bool_]:
275+
# pd.core.algorithms.duplicated is implemented with a hash table that
276+
# does not support UserDict values.
277+
# However, dict values are always hashed as 0 for backwards compatibility,
278+
# see GH 57052
279+
mask = self.isna().astype(np.bool_, copy=False)
280+
values = np.array([dict(x) for x in self], dtype="object")
281+
return duplicated(values=values, keep=keep, mask=mask)
282+
267283

268284
def make_data(n: int):
269285
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer

0 commit comments

Comments
 (0)