diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 4fb24c9ad1538..f1acc49e2ae1c 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -865,7 +865,7 @@ cdef class BlockManager: nb = blk.slice_block_rows(slobj) nbs.append(nb) - new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)] + new_axes = [self.axes[0]._view(), self.axes[1]._getitem_slice(slobj)] mgr = type(self)(tuple(nbs), new_axes, verify_integrity=False) # We can avoid having to rebuild blklocs/blknos @@ -887,6 +887,7 @@ cdef class BlockManager: new_axes = list(self.axes) new_axes[axis] = new_axes[axis]._getitem_slice(slobj) + new_axes[1 - axis] = self.axes[1 - axis]._view() return type(self)(tuple(new_blocks), new_axes, verify_integrity=False) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5e2050cd19767..ef67a1e98b7d5 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -250,6 +250,7 @@ def blklocs(self) -> npt.NDArray[np.intp]: def make_empty(self, axes=None) -> Self: """return an empty BlockManager with the items axis of len 0""" if axes is None: + # TODO shallow copy remaining axis? axes = [default_index(0)] + self.axes[1:] # preserve dtype if possible @@ -441,6 +442,7 @@ def apply( applied = getattr(b, f)(**kwargs) result_blocks = extend_blocks(applied, result_blocks) + # TODO shallow copy axes (in from_blocks or here?) out = type(self).from_blocks(result_blocks, self.axes) return out @@ -676,6 +678,7 @@ def get_numeric_data(self) -> Self: numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] if len(numeric_blocks) == len(self.blocks): # Avoid somewhat expensive _combine + # TODO(CoW) need to return a shallow copy here? return self return self._combine(numeric_blocks) @@ -702,6 +705,7 @@ def _combine(self, blocks: list[Block], index: Index | None = None) -> Self: new_blocks.append(nb) axes = list(self.axes) + # TODO shallow copy of axes? if index is not None: axes[-1] = index axes[0] = self.items.take(indexer) @@ -756,6 +760,7 @@ def consolidate(self) -> Self: if self.is_consolidated(): return self + # TODO shallow copy is not needed here? bm = type(self)(self.blocks, self.axes, verify_integrity=False) bm._is_consolidated = False bm._consolidate_inplace() @@ -813,6 +818,7 @@ def reindex_indexer( """ if indexer is None: if new_axis is self.axes[axis]: + # TODO(CoW) need to handle CoW? return self result = self.copy(deep=False) @@ -853,6 +859,8 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis + if self.ndim == 2: + new_axes[1 - axis] = self.axes[1 - axis]._view() new_mgr = type(self).from_blocks(new_blocks, new_axes) if axis == 1: @@ -1131,6 +1139,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager: ndim=1, refs=self.blocks[0].refs, ) + # TODO shallow copy columns return SingleBlockManager(block, self.axes[0]) dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) @@ -1176,6 +1185,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager: bp = BlockPlacement(slice(0, len(result))) block = new_block(result, placement=bp, ndim=1) + # TODO shallow copy columns return SingleBlockManager(block, self.axes[0]) def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: @@ -1190,7 +1200,7 @@ def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: nb = type(block)( values, placement=bp, ndim=1, refs=block.refs if track_ref else None ) - return SingleBlockManager(nb, self.axes[1]) + return SingleBlockManager(nb, self.axes[1].view()) def iget_values(self, i: int) -> ArrayLike: """ @@ -1588,6 +1598,7 @@ def idelete(self, indexer) -> BlockManager: nbs = self._slice_take_blocks_ax0(taker, only_slice=True, ref_inplace_op=True) new_columns = self.items[~is_deleted] + # TODO shallow copy index? axes = [new_columns, self.axes[1]] return type(self)(tuple(nbs), axes, verify_integrity=False) @@ -1625,6 +1636,7 @@ def grouped_reduce(self, func: Callable) -> Self: nrows = result_blocks[0].values.shape[-1] index = default_index(nrows) + # TODO shallow copy columns? return type(self).from_blocks(result_blocks, [self.axes[0], index]) def reduce(self, func: Callable) -> Self: @@ -1644,6 +1656,7 @@ def reduce(self, func: Callable) -> Self: res_blocks = [blk.reduce(func) for blk in self.blocks] index = default_index(1) # placeholder + # TODO shallow copy self.items? new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) return new_mgr @@ -1685,6 +1698,7 @@ def quantile( assert self.ndim >= 2 assert is_list_like(qs) # caller is responsible for this + # TODO shallow copy axes new_axes = list(self.axes) new_axes[1] = Index(qs, dtype=np.float64) @@ -1951,6 +1965,7 @@ def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self: offset += len(mgr.items) + # TODO relevant axis already shallow-copied at caller? new_mgr = cls(tuple(blocks), axes) return new_mgr @@ -2020,6 +2035,7 @@ def to_2d_mgr(self, columns: Index) -> BlockManager: arr = ensure_block_shape(blk.values, ndim=2) bp = BlockPlacement(0) new_blk = type(blk)(arr, placement=bp, ndim=2, refs=blk.refs) + # TODO shallow copy index axes = [columns, self.axes[0]] return BlockManager([new_blk], axes=axes, verify_integrity=False) diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index cf9466c0bdf0b..e51bcab6dd0c6 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -89,6 +89,7 @@ def operate_blockwise( # assert len(slocs) == nlocs, (len(slocs), nlocs) # assert slocs == set(range(nlocs)), slocs + # TODO shallow copy axes? new_mgr = type(right)(tuple(res_blks), axes=right.axes, verify_integrity=False) return new_mgr diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 37a21e1098e78..1b0c13c1a9ca8 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -57,6 +57,8 @@ def test_subset_column_selection(backend): subset = df[["a", "c"]] + assert subset.index is not df.index + # the subset shares memory ... assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) # ... but uses CoW when being modified @@ -100,6 +102,7 @@ def test_subset_row_slice(backend): subset = df[1:3] subset._mgr._verify_integrity() + assert subset.columns is not df.columns assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) subset.iloc[0, 0] = 0 @@ -128,6 +131,7 @@ def test_subset_column_slice(backend, dtype): subset = df.iloc[:, 1:] subset._mgr._verify_integrity() + assert subset.index is not df.index assert np.shares_memory(get_array(subset, "b"), get_array(df, "b")) subset.iloc[0, 0] = 0 @@ -173,6 +177,9 @@ def test_subset_loc_rows_columns( subset = df.loc[row_indexer, column_indexer] + assert subset.index is not df.index + assert subset.columns is not df.columns + # modifying the subset never modifies the parent subset.iloc[0, 0] = 0 @@ -216,6 +223,9 @@ def test_subset_iloc_rows_columns( subset = df.iloc[row_indexer, column_indexer] + assert subset.index is not df.index + assert subset.columns is not df.columns + # modifying the subset never modifies the parent subset.iloc[0, 0] = 0 @@ -534,6 +544,8 @@ def test_null_slice(backend, method): # we always return new objects (shallow copy), regardless of CoW or not assert df2 is not df + assert df2.index is not df.index + assert df2.columns is not df.columns # and those trigger CoW when mutated df2.iloc[0, 0] = 0 @@ -558,6 +570,7 @@ def test_null_slice_series(backend, method): # we always return new objects, regardless of CoW or not assert s2 is not s + assert s2.index is not s.index # and those trigger CoW when mutated s2.iloc[0] = 0 @@ -701,7 +714,9 @@ def test_column_as_series(backend): s = df["a"] + assert s.index is not df.index assert np.shares_memory(get_array(s, "a"), get_array(df, "a")) + s[0] = 0 expected = Series([0, 2, 3], name="a") @@ -754,6 +769,8 @@ def test_column_as_series_no_item_cache(request, backend, method): s2 = method(df) assert s1 is not s2 + assert s1.index is not df.index + assert s1.index is not s2.index s1.iloc[0] = 0 diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index dc0bc7e476a02..6f7b770672839 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -34,6 +34,9 @@ def test_copy(): assert not df_copy._mgr.blocks[0].refs.has_reference() assert not df_copy._mgr.blocks[1].refs.has_reference() + assert df_copy.index is not df.index + assert df_copy.columns is not df.columns + # mutating copy doesn't mutate original df_copy.iloc[0, 0] = 0 assert df.iloc[0, 0] == 1 diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index f7e4dbe11f3d3..fe2d8f92fc550 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -83,10 +83,6 @@ def test_align_nocopy(datetime_series): def test_align_same_index(datetime_series): - a, b = datetime_series.align(datetime_series) - assert a.index.is_(datetime_series.index) - assert b.index.is_(datetime_series.index) - a, b = datetime_series.align(datetime_series) assert a.index is not datetime_series.index assert b.index is not datetime_series.index