diff --git a/src/squidpy/tl/__init__.py b/src/squidpy/tl/__init__.py index 6d5abe98c..f37014f1f 100644 --- a/src/squidpy/tl/__init__.py +++ b/src/squidpy/tl/__init__.py @@ -2,5 +2,7 @@ from __future__ import annotations +from squidpy.tl._ring_density import ring_density from squidpy.tl._sliding_window import _calculate_window_corners, sliding_window +from squidpy.tl._smooth_density_by_distance import smooth_density_by_distance from squidpy.tl._var_by_distance import var_by_distance diff --git a/src/squidpy/tl/_ring_density.py b/src/squidpy/tl/_ring_density.py new file mode 100644 index 000000000..a86971bec --- /dev/null +++ b/src/squidpy/tl/_ring_density.py @@ -0,0 +1,482 @@ +from __future__ import annotations + +from collections.abc import Iterable, Sequence +from typing import Any, Literal + +import geopandas as gpd +import numpy as np +import pandas as pd +from scanpy import logging as logg +from shapely import STRtree, affinity, boundary, contains, distance, points +from spatialdata import SpatialData +from spatialdata.transformations import get_transformation + +from squidpy._docs import d +from squidpy._validators import ( + assert_key_in_sdata, + assert_non_empty_sequence, + assert_non_negative, + assert_one_of, + assert_positive, +) +from squidpy.gr._utils import _save_data + +__all__ = ["ring_density"] + + +@d.dedent +def ring_density( + sdata: SpatialData, + contour_key: str, + target: Literal["cells", "transcripts"] = "cells", + table_key: str | None = None, + points_key: str = "transcripts", + shape_key: str | None = None, + ring_width: float = 50.0, + inward: float = 0.0, + outward: float = 50.0, + coordinate_system: str = "global", + contour_query: str | None = None, + feature_key: str | None = None, + feature_values: str | Sequence[str] | None = None, + metadata_keys: str | Sequence[str] | None = None, + key_added: str = "ring_density", + copy: bool = False, +) -> pd.DataFrame | None: + """ + Compute inward/outward ring density around contour annotations stored in ``SpatialData``. + + Parameters + ---------- + sdata + SpatialData object containing the contour annotations and the target elements. + contour_key + Key in ``sdata.shapes`` with the contour polygons used as density anchors. + target + Whether to count cell centroids or transcript points. + table_key + Key in ``sdata.tables`` used to infer the cell-associated shape layer and to store results in ``.uns`` when + ``copy = False``. + points_key + Key in ``sdata.points`` used when ``target = 'transcripts'``. + shape_key + Optional key in ``sdata.shapes`` to use when ``target = 'cells'``. If omitted, the region linked to + ``sdata.tables[table_key]`` is used. + ring_width + Width of each ring in the target coordinate system. + inward + Maximum inward distance from the contour boundary. + outward + Maximum outward distance from the contour boundary. + coordinate_system + Coordinate system in which distances should be computed. + contour_query + Optional pandas query string used to subset the contours before computation. + feature_key + Column in the transcript points table used for feature filtering. If ``None`` and ``feature_values`` is + provided, the key is inferred from SpatialData metadata. + feature_values + Optional transcript feature value or values to keep before counting. + metadata_keys + Optional contour metadata columns to copy into the output table. By default, a small useful subset is copied + when present. + key_added + Key used when storing the resulting dataframe in ``sdata.tables[table_key].uns``. + copy + If ``True``, return a dataframe. Otherwise, store the result in ``.uns``. + + Returns + ------- + If ``copy = True``, returns a :class:`pandas.DataFrame` with one row per contour and ring. + Otherwise, stores the dataframe in ``sdata.tables[table_key].uns[key_added]``. + """ + start = logg.info(f"Computing {key_added}") + + assert_positive(ring_width, name="ring_width") + assert_non_negative(inward, name="inward") + assert_non_negative(outward, name="outward") + if inward == 0 and outward == 0: + raise ValueError("At least one of `inward` or `outward` must be greater than 0.") + assert_key_in_sdata(sdata, contour_key, attr="shapes") + assert_one_of(target, ["cells", "transcripts"], name="target") + + feature_values = ( + assert_non_empty_sequence(feature_values, name="feature_values") if feature_values is not None else None + ) + + contours = _prepare_contours( + sdata=sdata, + contour_key=contour_key, + coordinate_system=coordinate_system, + contour_query=contour_query, + ) + if contours.empty: + raise ValueError("No contours remain after applying the requested filters.") + + intervals = _build_ring_intervals(ring_width=ring_width, inward=inward, outward=outward) + interval_edges = np.asarray([intervals[0][0], *[end for _, end in intervals]], dtype=float) + + contour_geometries = np.asarray(contours.geometry.values, dtype=object) + contour_boundaries = np.asarray([boundary(geom) for geom in contour_geometries], dtype=object) + max_radius = max(inward, outward) + expanded_geometries = np.asarray( + [geom.buffer(max_radius) if max_radius > 0 else geom for geom in contour_geometries], + dtype=object, + ) + + if target == "cells": + resolved_shape_key = _resolve_shape_key(sdata=sdata, table_key=table_key, shape_key=shape_key) + target_geometries = _prepare_cell_targets( + sdata=sdata, + table_key=table_key, + shape_key=resolved_shape_key, + coordinate_system=coordinate_system, + ) + counts = _count_geometry_targets( + target_geometries=target_geometries, + expanded_geometries=expanded_geometries, + contour_geometries=contour_geometries, + contour_boundaries=contour_boundaries, + interval_edges=interval_edges, + ) + target_key = resolved_shape_key + else: + counts = _count_transcript_targets( + sdata=sdata, + points_key=points_key, + coordinate_system=coordinate_system, + expanded_geometries=expanded_geometries, + contour_geometries=contour_geometries, + contour_boundaries=contour_boundaries, + interval_edges=interval_edges, + feature_key=feature_key, + feature_values=feature_values, + ) + target_key = points_key + + areas = np.asarray( + [ + [_ring_area(geom, ring_start=ring_start, ring_end=ring_end) for ring_start, ring_end in intervals] + for geom in contour_geometries + ], + dtype=float, + ) + + result = _assemble_result( + contours=contours, + intervals=intervals, + counts=counts, + areas=areas, + contour_key=contour_key, + target=target, + target_key=target_key, + feature_values=feature_values, + metadata_keys=metadata_keys, + ) + + if copy: + logg.info("Finish", time=start) + return result + + if table_key is None: + raise ValueError("`table_key` must be specified when `copy=False` to determine where to store the result.") + assert_key_in_sdata(sdata, table_key, attr="tables") + + _save_data(sdata.tables[table_key], attr="uns", key=key_added, data=result, time=start) + return None + + +def _prepare_contours( + sdata: SpatialData, + contour_key: str, + coordinate_system: str, + contour_query: str | None, +) -> gpd.GeoDataFrame: + contours = sdata.shapes[contour_key].copy() + matrix = _get_affine_matrix(sdata.shapes[contour_key], coordinate_system=coordinate_system) + contours.geometry = contours.geometry.apply(lambda geom: _apply_affine_to_geometry(geom, matrix)) + if contour_query is not None: + contours = contours.query(contour_query) + return contours.reset_index(names="_squidpy_contour_id") + + +def _prepare_cell_targets( + sdata: SpatialData, + table_key: str | None, + shape_key: str, + coordinate_system: str, +) -> np.ndarray: + shapes = sdata.shapes[shape_key] + table_index: pd.Index | None = None + if table_key is not None: + assert_key_in_sdata(sdata, table_key, attr="tables") + table_index = sdata.tables[table_key].obs_names + + if table_index is not None: + keep_mask = shapes.index.isin(table_index) + shapes = shapes.loc[keep_mask] + + geometry = shapes.geometry + if (geometry.geom_type == "Point").all(): + x = geometry.x.to_numpy(dtype=float) + y = geometry.y.to_numpy(dtype=float) + else: + centroids = geometry.centroid + x = centroids.x.to_numpy(dtype=float) + y = centroids.y.to_numpy(dtype=float) + + matrix = _get_affine_matrix(sdata.shapes[shape_key], coordinate_system=coordinate_system) + transformed_xy = _apply_affine_to_xy(np.column_stack([x, y]), matrix) + return np.asarray(points(transformed_xy[:, 0], transformed_xy[:, 1]), dtype=object) + + +def _count_geometry_targets( + target_geometries: np.ndarray, + expanded_geometries: np.ndarray, + contour_geometries: np.ndarray, + contour_boundaries: np.ndarray, + interval_edges: np.ndarray, +) -> np.ndarray: + counts = np.zeros((len(contour_geometries), len(interval_edges) - 1), dtype=np.int64) + if len(target_geometries) == 0: + return counts + + tree = STRtree(target_geometries) + for contour_idx, expanded_geom in enumerate(expanded_geometries): + target_idx = tree.query(expanded_geom) + if len(target_idx) == 0: + continue + + candidate_points = target_geometries.take(target_idx) + signed = _signed_distance( + point_geometries=candidate_points, + contour_geometries=np.repeat(contour_geometries[contour_idx], len(candidate_points)), + contour_boundaries=np.repeat(contour_boundaries[contour_idx], len(candidate_points)), + ) + ring_idx = _assign_intervals(signed_distances=signed, interval_edges=interval_edges) + valid = ring_idx >= 0 + if np.any(valid): + np.add.at(counts[contour_idx], ring_idx[valid], 1) + return counts + + +def _count_transcript_targets( + sdata: SpatialData, + points_key: str, + coordinate_system: str, + expanded_geometries: np.ndarray, + contour_geometries: np.ndarray, + contour_boundaries: np.ndarray, + interval_edges: np.ndarray, + feature_key: str | None, + feature_values: Sequence[str] | None, +) -> np.ndarray: + assert_key_in_sdata(sdata, points_key, attr="points") + + points_ddf = sdata.points[points_key] + required_columns = ["x", "y"] + if feature_values is not None: + feature_key = _resolve_feature_key(points_ddf=points_ddf, feature_key=feature_key) + feature_values = set(feature_values) + required_columns.append(feature_key) + points_ddf = points_ddf[required_columns] + + matrix = _get_affine_matrix(points_ddf, coordinate_system=coordinate_system) + tree = STRtree(expanded_geometries) + counts = np.zeros((len(contour_geometries), len(interval_edges) - 1), dtype=np.int64) + + for delayed_partition in points_ddf.to_delayed(): + partition = delayed_partition.compute() + if feature_values is not None: + partition = partition.loc[partition[feature_key].isin(feature_values)] + if partition.empty: + continue + + xy = partition[["x", "y"]].to_numpy(dtype=float) + transformed_xy = _apply_affine_to_xy(xy, matrix) + point_geometries = np.asarray(points(transformed_xy[:, 0], transformed_xy[:, 1]), dtype=object) + + pair_idx = tree.query(point_geometries) + if pair_idx.size == 0: + continue + + point_idx = pair_idx[0] + contour_idx = pair_idx[1] + signed = _signed_distance( + point_geometries=point_geometries.take(point_idx), + contour_geometries=contour_geometries.take(contour_idx), + contour_boundaries=contour_boundaries.take(contour_idx), + ) + ring_idx = _assign_intervals(signed_distances=signed, interval_edges=interval_edges) + valid = ring_idx >= 0 + if np.any(valid): + np.add.at(counts, (contour_idx[valid], ring_idx[valid]), 1) + + return counts + + +def _assemble_result( + contours: gpd.GeoDataFrame, + intervals: list[tuple[float, float]], + counts: np.ndarray, + areas: np.ndarray, + contour_key: str, + target: str, + target_key: str | None, + feature_values: Sequence[str] | None, + metadata_keys: str | Sequence[str] | None, +) -> pd.DataFrame: + metadata_columns = _resolve_metadata_columns(contours=contours, metadata_keys=metadata_keys) + records: list[dict[str, Any]] = [] + feature_values_list = _as_list(feature_values) if feature_values is not None else None + + for contour_idx, contour_row in contours.iterrows(): + for ring_idx, (ring_start, ring_end) in enumerate(intervals): + area = float(areas[contour_idx, ring_idx]) + count = int(counts[contour_idx, ring_idx]) + record: dict[str, Any] = { + "contour_key": contour_key, + "contour_id": contour_row["_squidpy_contour_id"], + "target": target, + "target_key": target_key, + "ring_start": ring_start, + "ring_end": ring_end, + "ring_mid": 0.5 * (ring_start + ring_end), + "count": count, + "area": area, + "density": np.nan if area <= 0 else count / area, + } + if feature_values_list is not None: + record["feature_values"] = tuple(feature_values_list) + for column in metadata_columns: + record[column] = contour_row[column] + records.append(record) + return pd.DataFrame.from_records(records) + + +def _resolve_shape_key(sdata: SpatialData, table_key: str | None, shape_key: str | None) -> str: + if shape_key is not None: + assert_key_in_sdata(sdata, shape_key, attr="shapes") + return shape_key + + if table_key is None: + raise ValueError("Specify either `shape_key` or `table_key` when `target='cells'`.") + assert_key_in_sdata(sdata, table_key, attr="tables") + + attrs = sdata.tables[table_key].uns.get("spatialdata_attrs", {}) + region = attrs.get("region") + if region is None: + raise KeyError( + f"Unable to infer a shape key from `sdata.tables[{table_key!r}].uns['spatialdata_attrs']['region']`." + ) + + if isinstance(region, str): + resolved_region = region + else: + regions = assert_non_empty_sequence(region, name="regions") + if len(regions) != 1: + raise ValueError( + f"Unable to infer a unique shape key from `sdata.tables[{table_key!r}].uns['spatialdata_attrs']['region']`. " + "Please specify `shape_key` explicitly." + ) + resolved_region = str(regions[0]) + + assert_key_in_sdata(sdata, resolved_region, attr="shapes") + return resolved_region + + +def _resolve_feature_key(points_ddf: Any, feature_key: str | None) -> str: + if feature_key is not None: + if feature_key not in points_ddf.columns: + raise KeyError(f"Feature key `{feature_key}` not found in the transcript points dataframe.") + return feature_key + + attrs = getattr(points_ddf, "attrs", {}) + inferred = attrs.get("spatialdata_attrs", {}).get("feature_key") + if inferred is None: + raise KeyError("Unable to infer `feature_key` from SpatialData metadata. Please specify it explicitly.") + if inferred not in points_ddf.columns: + raise KeyError(f"Inferred feature key `{inferred}` not found in the transcript points dataframe.") + return str(inferred) + + +def _resolve_metadata_columns(contours: gpd.GeoDataFrame, metadata_keys: str | Sequence[str] | None) -> list[str]: + if metadata_keys is None: + defaults = ["classification_name", "assigned_structure", "annotation_source"] + return [column for column in defaults if column in contours.columns] + return [column for column in _as_list(metadata_keys) if column in contours.columns] + + +def _build_ring_intervals(ring_width: float, inward: float, outward: float) -> list[tuple[float, float]]: + intervals: list[tuple[float, float]] = [] + + current = -float(inward) + while current < 0: + next_edge = min(current + ring_width, 0.0) + intervals.append((current, next_edge)) + current = next_edge + + current = 0.0 + while current < outward: + next_edge = min(current + ring_width, float(outward)) + intervals.append((current, next_edge)) + current = next_edge + + return intervals + + +def _ring_area(geometry: Any, ring_start: float, ring_end: float) -> float: + if ring_end <= 0: + outer = geometry.buffer(-abs(ring_end)) + inner = geometry.buffer(-abs(ring_start)) + return max(outer.area - inner.area, 0.0) + return max(geometry.buffer(ring_end).area - geometry.buffer(ring_start).area, 0.0) + + +def _signed_distance( + point_geometries: np.ndarray, + contour_geometries: np.ndarray, + contour_boundaries: np.ndarray, +) -> np.ndarray: + dist = np.asarray(distance(point_geometries, contour_boundaries), dtype=float) + inside = np.asarray(contains(contour_geometries, point_geometries), dtype=bool) + signed = np.where(dist == 0, 0.0, np.where(inside, -dist, dist)) + return np.asarray(signed, dtype=float) + + +def _assign_intervals(signed_distances: np.ndarray, interval_edges: np.ndarray) -> np.ndarray: + ring_idx = np.searchsorted(interval_edges, signed_distances, side="right") - 1 + on_last_edge = np.isclose(signed_distances, interval_edges[-1]) + ring_idx[on_last_edge] = len(interval_edges) - 2 + valid = (ring_idx >= 0) & (ring_idx < len(interval_edges) - 1) + return np.where(valid, ring_idx, -1) + + +def _get_affine_matrix(element: Any, coordinate_system: str) -> np.ndarray: + transformation = get_transformation(element, to_coordinate_system=coordinate_system) + matrix = transformation.to_affine_matrix(input_axes=("x", "y"), output_axes=("x", "y")) + if matrix.shape != (3, 3): + raise ValueError(f"Expected a 3x3 affine matrix, found shape {matrix.shape}.") + return np.asarray(matrix, dtype=float) + + +def _apply_affine_to_geometry(geometry: Any, matrix: np.ndarray) -> Any: + return affinity.affine_transform( + geometry, + [matrix[0, 0], matrix[0, 1], matrix[1, 0], matrix[1, 1], matrix[0, 2], matrix[1, 2]], + ) + + +def _apply_affine_to_xy(xy: np.ndarray, matrix: np.ndarray) -> np.ndarray: + transformed = (matrix[:2, :2] @ xy.T).T + matrix[:2, 2] + return np.asarray(transformed, dtype=float) + + +def _as_list(values: str | Sequence[str] | None) -> list[str]: + if values is None: + return [] + if isinstance(values, str): + return [values] + if isinstance(values, Iterable): + return [str(value) for value in values] + return [str(values)] diff --git a/src/squidpy/tl/_smooth_density_by_distance.py b/src/squidpy/tl/_smooth_density_by_distance.py new file mode 100644 index 000000000..45446cb09 --- /dev/null +++ b/src/squidpy/tl/_smooth_density_by_distance.py @@ -0,0 +1,410 @@ +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any + +import geopandas as gpd +import numpy as np +import pandas as pd +from scanpy import logging as logg +from shapely import STRtree, points +from spatialdata import SpatialData + +from squidpy._docs import d +from squidpy._validators import assert_key_in_sdata, assert_non_empty_sequence, assert_non_negative, assert_positive +from squidpy.gr._utils import _save_data +from squidpy.tl._ring_density import ( + _apply_affine_to_xy, + _as_list, + _get_affine_matrix, + _prepare_contours, + _resolve_feature_key, + _resolve_metadata_columns, + _signed_distance, +) + +__all__ = ["smooth_density_by_distance"] + +_GAUSSIAN_KERNEL = "gaussian" +_KERNEL_TRUNCATION = 4.0 + + +@d.dedent +def smooth_density_by_distance( + sdata: SpatialData, + contour_key: str, + bandwidth: float, + table_key: str | None = None, + points_key: str = "transcripts", + feature_key: str | None = None, + feature_values: str | Sequence[str] | None = None, + grid_step: float | None = None, + inward: float = 0.0, + outward: float = 50.0, + coordinate_system: str = "global", + contour_query: str | None = None, + metadata_keys: str | Sequence[str] | None = None, + key_added: str = "smooth_density_by_distance", + copy: bool = False, +) -> pd.DataFrame | None: + """ + Compute a smooth signed-distance density profile around contour annotations stored in ``SpatialData``. + + Parameters + ---------- + sdata + SpatialData object containing the contour annotations and transcript points. + contour_key + Key in ``sdata.shapes`` with the contour polygons used as density anchors. + bandwidth + Gaussian kernel bandwidth in the target coordinate system. + table_key + Key in ``sdata.tables`` used to store results in ``.uns`` when ``copy = False``. + points_key + Key in ``sdata.points`` with the transcript points. + feature_key + Column in the transcript points table used for feature filtering. If ``None`` and ``feature_values`` is + provided, the key is inferred from SpatialData metadata. + feature_values + Optional transcript feature value or values to keep before counting. + grid_step + Distance between consecutive signed-distance evaluation points. Defaults to ``bandwidth / 4``. + inward + Maximum inward distance from the contour boundary. + outward + Maximum outward distance from the contour boundary. + coordinate_system + Coordinate system in which distances should be computed. + contour_query + Optional pandas query string used to subset the contours before computation. + metadata_keys + Optional contour metadata columns to copy into the output table. By default, a small useful subset is copied + when present. + key_added + Key used when storing the resulting dataframe in ``sdata.tables[table_key].uns``. + copy + If ``True``, return a dataframe. Otherwise, store the result in ``.uns``. + + Returns + ------- + If ``copy = True``, returns a :class:`pandas.DataFrame` with one row per contour and signed-distance grid point. + Otherwise, stores the dataframe in ``sdata.tables[table_key].uns[key_added]``. + """ + start = logg.info(f"Computing {key_added}") + + assert_positive(bandwidth, name="bandwidth") + assert_non_negative(inward, name="inward") + assert_non_negative(outward, name="outward") + if inward == 0 and outward == 0: + raise ValueError("At least one of `inward` or `outward` must be greater than 0.") + + if grid_step is None: + grid_step = bandwidth / 4.0 + assert_positive(grid_step, name="grid_step") + + assert_key_in_sdata(sdata, contour_key, attr="shapes") + assert_key_in_sdata(sdata, points_key, attr="points") + feature_values = ( + assert_non_empty_sequence(feature_values, name="feature_values") if feature_values is not None else None + ) + + contours = _prepare_contours( + sdata=sdata, + contour_key=contour_key, + coordinate_system=coordinate_system, + contour_query=contour_query, + ) + if contours.empty: + raise ValueError("No contours remain after applying the requested filters.") + + signed_distance_grid = _build_signed_distance_grid(inward=inward, outward=outward, grid_step=grid_step) + contour_geometries = np.asarray(contours.geometry.values, dtype=object) + contour_boundaries = np.asarray([geom.boundary for geom in contour_geometries], dtype=object) + support_geometries = np.asarray( + [ + _build_support_geometry( + geom, + inward=inward, + outward=outward, + cutoff_radius=_KERNEL_TRUNCATION * bandwidth, + ) + for geom in contour_geometries + ], + dtype=object, + ) + + count_density = _count_smoothed_transcripts( + sdata=sdata, + points_key=points_key, + coordinate_system=coordinate_system, + feature_key=feature_key, + feature_values=feature_values, + contour_geometries=contour_geometries, + contour_boundaries=contour_boundaries, + support_geometries=support_geometries, + signed_distance_grid=signed_distance_grid, + inward=inward, + outward=outward, + bandwidth=bandwidth, + ) + geometry_measure = _compute_geometry_measure( + contour_geometries=contour_geometries, + signed_distance_grid=signed_distance_grid, + inward=inward, + outward=outward, + grid_step=grid_step, + ) + + result = _assemble_smooth_result( + contours=contours, + contour_key=contour_key, + points_key=points_key, + signed_distance_grid=signed_distance_grid, + bandwidth=bandwidth, + grid_step=grid_step, + count_density=count_density, + geometry_measure=geometry_measure, + feature_values=feature_values, + metadata_keys=metadata_keys, + ) + + if copy: + logg.info("Finish", time=start) + return result + + if table_key is None: + raise ValueError("`table_key` must be specified when `copy=False` to determine where to store the result.") + assert_key_in_sdata(sdata, table_key, attr="tables") + _save_data(sdata.tables[table_key], attr="uns", key=key_added, data=result, time=start) + return None + + +def _build_signed_distance_grid(inward: float, outward: float, grid_step: float) -> np.ndarray: + grid = np.arange(-float(inward), float(outward) + grid_step, grid_step, dtype=float) + if grid.size == 0: + return np.asarray([-float(inward), float(outward)], dtype=float) + if np.isclose(grid[-1], outward): + grid[-1] = float(outward) + elif grid[-1] < outward: + grid = np.append(grid, float(outward)) + else: + grid = np.append(grid[grid < outward], float(outward)) + return np.unique(grid) + + +def _build_support_geometry(geometry: Any, inward: float, outward: float, cutoff_radius: float) -> Any: + outer = geometry.buffer(outward + cutoff_radius) + inner = geometry.buffer(-(inward + cutoff_radius)) + if inner.is_empty: + return outer + return outer.difference(inner) + + +def _count_smoothed_transcripts( + sdata: SpatialData, + points_key: str, + coordinate_system: str, + feature_key: str | None, + feature_values: Sequence[str] | None, + contour_geometries: np.ndarray, + contour_boundaries: np.ndarray, + support_geometries: np.ndarray, + signed_distance_grid: np.ndarray, + inward: float, + outward: float, + bandwidth: float, +) -> np.ndarray: + points_ddf = sdata.points[points_key] + required_columns = ["x", "y"] + if feature_values is not None: + feature_key = _resolve_feature_key(points_ddf=points_ddf, feature_key=feature_key) + required_columns.append(feature_key) + feature_values = set(feature_values) + points_ddf = points_ddf[required_columns] + + matrix = _get_affine_matrix(points_ddf, coordinate_system=coordinate_system) + tree = STRtree(support_geometries) + counts = np.zeros((len(contour_geometries), len(signed_distance_grid)), dtype=float) + lower = -float(inward) + upper = float(outward) + + for delayed_partition in points_ddf.to_delayed(): + partition = delayed_partition.compute() + if feature_values is not None: + partition = partition.loc[partition[feature_key].isin(feature_values)] + if partition.empty: + continue + + xy = partition[["x", "y"]].to_numpy(dtype=float) + transformed_xy = _apply_affine_to_xy(xy, matrix) + point_geometries = np.asarray(points(transformed_xy[:, 0], transformed_xy[:, 1]), dtype=object) + + pair_idx = tree.query(point_geometries) + if pair_idx.size == 0: + continue + + point_idx = pair_idx[0] + contour_idx = pair_idx[1] + signed = _signed_distance( + point_geometries=point_geometries.take(point_idx), + contour_geometries=contour_geometries.take(contour_idx), + contour_boundaries=contour_boundaries.take(contour_idx), + ) + valid = (signed >= lower) & (signed <= upper) + if not np.any(valid): + continue + + valid_contour_idx = contour_idx[valid] + valid_signed = signed[valid] + for current_contour_idx in np.unique(valid_contour_idx): + signed_for_contour = valid_signed[valid_contour_idx == current_contour_idx] + _accumulate_gaussian_with_reflection( + counts[current_contour_idx], + signed_distance_grid=signed_distance_grid, + signed_distances=signed_for_contour, + bandwidth=bandwidth, + lower=lower, + upper=upper, + ) + + return counts + + +def _accumulate_gaussian_with_reflection( + accumulator: np.ndarray, + signed_distance_grid: np.ndarray, + signed_distances: np.ndarray, + bandwidth: float, + lower: float, + upper: float, + chunk_size: int = 2048, +) -> None: + if signed_distances.size == 0: + return + + reflected_left = 2.0 * lower - signed_distances + reflected_right = 2.0 * upper - signed_distances + cutoff = _KERNEL_TRUNCATION * bandwidth + + for start in range(0, signed_distances.size, chunk_size): + slc = slice(start, start + chunk_size) + accumulator += _gaussian_kernel_sum( + signed_distance_grid=signed_distance_grid, + sample_locations=signed_distances[slc], + bandwidth=bandwidth, + cutoff=cutoff, + ) + accumulator += _gaussian_kernel_sum( + signed_distance_grid=signed_distance_grid, + sample_locations=reflected_left[slc], + bandwidth=bandwidth, + cutoff=cutoff, + ) + accumulator += _gaussian_kernel_sum( + signed_distance_grid=signed_distance_grid, + sample_locations=reflected_right[slc], + bandwidth=bandwidth, + cutoff=cutoff, + ) + + +def _gaussian_kernel_sum( + signed_distance_grid: np.ndarray, + sample_locations: np.ndarray, + bandwidth: float, + cutoff: float, +) -> np.ndarray: + if sample_locations.size == 0: + return np.zeros(len(signed_distance_grid), dtype=float) + + diffs = signed_distance_grid[:, None] - sample_locations[None, :] + mask = np.abs(diffs) <= cutoff + scaled = diffs / bandwidth + weights = np.exp(-0.5 * scaled**2) / (np.sqrt(2.0 * np.pi) * bandwidth) + weights[~mask] = 0.0 + return np.asarray(weights.sum(axis=1), dtype=float) + + +def _compute_geometry_measure( + contour_geometries: np.ndarray, + signed_distance_grid: np.ndarray, + inward: float, + outward: float, + grid_step: float, +) -> np.ndarray: + geometry_measure = np.zeros((len(contour_geometries), len(signed_distance_grid)), dtype=float) + lower = -float(inward) + upper = float(outward) + half_step = grid_step / 2.0 + + for contour_idx, geometry in enumerate(contour_geometries): + for grid_idx, signed_distance in enumerate(signed_distance_grid): + interval_start = max(lower, signed_distance - half_step) + interval_end = min(upper, signed_distance + half_step) + interval_width = interval_end - interval_start + if interval_width <= 0: + geometry_measure[contour_idx, grid_idx] = 0.0 + continue + + local_area = _shell_area(geometry, interval_start, interval_end) + geometry_measure[contour_idx, grid_idx] = local_area / interval_width if local_area > 0 else 0.0 + + return geometry_measure + + +def _shell_area(geometry: Any, distance_start: float, distance_end: float) -> float: + if distance_end <= distance_start: + return 0.0 + + if distance_end <= 0: + outer = geometry.buffer(-abs(distance_end)) + inner = geometry.buffer(-abs(distance_start)) + return max(float(outer.area) - float(inner.area), 0.0) + + if distance_start >= 0: + return max(float(geometry.buffer(distance_end).area) - float(geometry.buffer(distance_start).area), 0.0) + + inner_core = geometry.buffer(-abs(distance_start)) + return max(float(geometry.buffer(distance_end).area) - float(inner_core.area), 0.0) + + +def _assemble_smooth_result( + contours: gpd.GeoDataFrame, + contour_key: str, + points_key: str, + signed_distance_grid: np.ndarray, + bandwidth: float, + grid_step: float, + count_density: np.ndarray, + geometry_measure: np.ndarray, + feature_values: Sequence[str] | None, + metadata_keys: str | Sequence[str] | None, +) -> pd.DataFrame: + metadata_columns = _resolve_metadata_columns(contours=contours, metadata_keys=metadata_keys) + records: list[dict[str, Any]] = [] + feature_values_list = _as_list(feature_values) if feature_values is not None else None + + for contour_idx, contour_row in contours.iterrows(): + for grid_idx, signed_distance in enumerate(signed_distance_grid): + local_geometry_measure = float(geometry_measure[contour_idx, grid_idx]) + local_count_density = float(count_density[contour_idx, grid_idx]) + record: dict[str, Any] = { + "contour_key": contour_key, + "contour_id": contour_row["_squidpy_contour_id"], + "target": "transcripts", + "target_key": points_key, + "signed_distance": float(signed_distance), + "bandwidth": float(bandwidth), + "grid_step": float(grid_step), + "kernel": _GAUSSIAN_KERNEL, + "count_density": local_count_density, + "geometry_measure": local_geometry_measure, + "density": np.nan if local_geometry_measure <= 0 else local_count_density / local_geometry_measure, + } + if feature_values_list is not None: + record["feature_values"] = tuple(feature_values_list) + for column in metadata_columns: + record[column] = contour_row[column] + records.append(record) + + return pd.DataFrame.from_records(records) diff --git a/tests/tools/test_ring_density.py b/tests/tools/test_ring_density.py new file mode 100644 index 000000000..6140fc88d --- /dev/null +++ b/tests/tools/test_ring_density.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import anndata as ad +import dask.dataframe as dd +import geopandas as gpd +import numpy as np +import pandas as pd +import pytest +import spatialdata as sd +from shapely import Point, Polygon +from spatialdata.transformations import Identity, Scale + +from squidpy.tl import ring_density + + +@pytest.fixture() +def sdata_ring_density(): + contour_df = gpd.GeoDataFrame( + { + "classification_name": ["region_a"], + "assigned_structure": ["region_a"], + "annotation_source": ["synthetic"], + }, + index=pd.Index(["contour_a"], name="region_id"), + geometry=[Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])], + ) + + cells_raw = gpd.GeoDataFrame( + {"radius": [1.0] * 4}, + index=pd.Index(["cell_0", "cell_1", "cell_2", "cell_3"], name="cell_id"), + geometry=[ + Point(2.5, 2.5), # global (5, 5), far inside -> excluded for inward=4 + Point(5.5, 2.5), # global (11, 5), first outward ring + Point(6.5, 2.5), # global (13, 5), second outward ring + Point(2.5, 4.75), # global (5, 9.5), first inward ring + ], + ) + + transcripts_df = pd.DataFrame( + { + "x": [2.5, 5.5, 6.5, 2.5], + "y": [4.75, 2.5, 2.5, 2.5], + "feature_name": ["B", "A", "A", "A"], + "cell_id": ["cell_3", "cell_1", "cell_2", "cell_0"], + } + ) + transcripts_ddf = dd.from_pandas(transcripts_df, npartitions=2) + + adata = ad.AnnData(np.zeros((len(cells_raw), 1))) + adata.obs_names = cells_raw.index.astype(str) + adata.obs["cell_id"] = adata.obs_names.astype(str) + adata.obs["region"] = pd.Categorical(["cells"] * len(cells_raw)) + adata.uns["spatialdata_attrs"] = { + "region": "cells", + "region_key": "region", + "instance_key": "cell_id", + } + + return sd.SpatialData.init_from_elements( + { + "cells": sd.models.ShapesModel().parse( + cells_raw, transformations={"global": Scale([2.0, 2.0], axes=("x", "y"))} + ), + "contours": sd.models.ShapesModel().parse(contour_df, transformations={"global": Identity()}), + "transcripts": sd.models.PointsModel().parse( + transcripts_ddf, + feature_key="feature_name", + instance_key="cell_id", + transformations={"global": Scale([2.0, 2.0], axes=("x", "y"))}, + ), + "table": sd.models.TableModel().parse(adata), + } + ) + + +class TestRingDensity: + def test_ring_density_cells(self, sdata_ring_density: sd.SpatialData): + result = ring_density( + sdata_ring_density, + contour_key="contours", + target="cells", + table_key="table", + ring_width=2.0, + inward=4.0, + outward=4.0, + copy=True, + ) + + inward_ring = result.loc[(result["ring_start"] == -2.0) & (result["ring_end"] == 0.0), "count"].item() + first_outward = result.loc[(result["ring_start"] == 0.0) & (result["ring_end"] == 2.0), "count"].item() + second_outward = result.loc[(result["ring_start"] == 2.0) & (result["ring_end"] == 4.0), "count"].item() + + assert inward_ring == 1 + assert first_outward == 1 + assert second_outward == 1 + + contour = sdata_ring_density.shapes["contours"].geometry.iloc[0] + expected_area = contour.area - contour.buffer(-2.0).area + observed_area = result.loc[(result["ring_start"] == -2.0) & (result["ring_end"] == 0.0), "area"].item() + assert observed_area == pytest.approx(expected_area) + assert result["contour_id"].nunique() == 1 + assert result["contour_id"].iloc[0] == "contour_a" + assert result["target_key"].nunique() == 1 + assert result["target_key"].iloc[0] == "cells" + + def test_ring_density_transcripts_feature_filter(self, sdata_ring_density: sd.SpatialData): + result = ring_density( + sdata_ring_density, + contour_key="contours", + target="transcripts", + table_key="table", + points_key="transcripts", + ring_width=2.0, + inward=4.0, + outward=4.0, + feature_values="A", + copy=True, + ) + + inward_ring = result.loc[(result["ring_start"] == -2.0) & (result["ring_end"] == 0.0), "count"].item() + first_outward = result.loc[(result["ring_start"] == 0.0) & (result["ring_end"] == 2.0), "count"].item() + second_outward = result.loc[(result["ring_start"] == 2.0) & (result["ring_end"] == 4.0), "count"].item() + + assert inward_ring == 0 + assert first_outward == 1 + assert second_outward == 1 + assert result["feature_values"].dropna().iloc[0] == ("A",) + assert result["target_key"].nunique() == 1 + assert result["target_key"].iloc[0] == "transcripts" + + def test_ring_density_save_to_uns(self, sdata_ring_density: sd.SpatialData): + ring_density( + sdata_ring_density, + contour_key="contours", + target="cells", + table_key="table", + ring_width=2.0, + inward=2.0, + outward=2.0, + copy=False, + ) + + assert "ring_density" in sdata_ring_density.tables["table"].uns + stored = sdata_ring_density.tables["table"].uns["ring_density"] + assert isinstance(stored, pd.DataFrame) + assert {"ring_start", "ring_end", "count", "area", "density"} <= set(stored.columns) + + def test_ring_density_accepts_single_region_sequence(self, sdata_ring_density: sd.SpatialData): + sdata_ring_density.tables["table"].uns["spatialdata_attrs"]["region"] = ["cells"] + + result = ring_density( + sdata_ring_density, + contour_key="contours", + target="cells", + table_key="table", + ring_width=2.0, + inward=2.0, + outward=2.0, + copy=True, + ) + + assert result["target_key"].nunique() == 1 + assert result["target_key"].iloc[0] == "cells" diff --git a/tests/tools/test_smooth_density_by_distance.py b/tests/tools/test_smooth_density_by_distance.py new file mode 100644 index 000000000..6b4ee9714 --- /dev/null +++ b/tests/tools/test_smooth_density_by_distance.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import anndata as ad +import dask.dataframe as dd +import geopandas as gpd +import numpy as np +import pandas as pd +import pytest +import spatialdata as sd +from shapely import Point, Polygon +from spatialdata.transformations import Identity + +from squidpy.tl import smooth_density_by_distance +from squidpy.tl._smooth_density_by_distance import _shell_area + + +def _make_sdata(transcripts_df: pd.DataFrame, npartitions: int = 2) -> sd.SpatialData: + contour_df = gpd.GeoDataFrame( + { + "classification_name": ["region_a"], + "assigned_structure": ["region_a"], + "annotation_source": ["synthetic"], + }, + index=pd.Index(["contour_a"], name="region_id"), + geometry=[Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])], + ) + + cells_raw = gpd.GeoDataFrame( + {"radius": [1.0]}, + index=pd.Index(["cell_0"], name="cell_id"), + geometry=[Point(1.0, 1.0)], + ) + + transcripts_ddf = dd.from_pandas(transcripts_df, npartitions=npartitions) + + adata = ad.AnnData(np.zeros((len(cells_raw), 1))) + adata.obs_names = cells_raw.index.astype(str) + adata.obs["cell_id"] = adata.obs_names.astype(str) + adata.obs["region"] = pd.Categorical(["cells"] * len(cells_raw)) + adata.uns["spatialdata_attrs"] = { + "region": "cells", + "region_key": "region", + "instance_key": "cell_id", + } + + return sd.SpatialData.init_from_elements( + { + "cells": sd.models.ShapesModel().parse(cells_raw, transformations={"global": Identity()}), + "contours": sd.models.ShapesModel().parse(contour_df, transformations={"global": Identity()}), + "transcripts": sd.models.PointsModel().parse( + transcripts_ddf, + feature_key="feature_name", + instance_key="cell_id", + transformations={"global": Identity()}, + ), + "table": sd.models.TableModel().parse(adata), + } + ) + + +@pytest.fixture() +def sdata_smooth_density() -> sd.SpatialData: + transcripts_df = pd.DataFrame( + { + "x": [ + 0.5, + 0.8, + 1.2, + -0.5, + -0.8, + -1.2, + 10.5, + 10.8, + 3.5, + 4.0, + 13.5, + 14.0, + 0.5, + ], + "y": [5.0] * 13, + "feature_name": ["A"] * 12 + ["B"], + "cell_id": ["cell_0"] * 13, + } + ) + return _make_sdata(transcripts_df, npartitions=2) + + +@pytest.fixture() +def sdata_boundary_probe() -> sd.SpatialData: + transcripts_df = pd.DataFrame( + { + "x": [3.9], + "y": [5.0], + "feature_name": ["A"], + "cell_id": ["cell_0"], + } + ) + return _make_sdata(transcripts_df, npartitions=1) + + +class TestSmoothDensityByDistance: + def test_smooth_density_output_structure(self, sdata_smooth_density: sd.SpatialData): + result = smooth_density_by_distance( + sdata_smooth_density, + contour_key="contours", + table_key="table", + points_key="transcripts", + feature_values="A", + bandwidth=1.0, + grid_step=0.5, + inward=4.0, + outward=4.0, + copy=True, + ) + + expected_grid = np.arange(-4.0, 4.0 + 0.5, 0.5) + assert np.allclose(result["signed_distance"].to_numpy(dtype=float), expected_grid) + assert result["signed_distance"].is_monotonic_increasing + assert len(result) == len(expected_grid) + assert result["contour_id"].nunique() == 1 + assert result["contour_id"].iloc[0] == "contour_a" + assert result["target"].nunique() == 1 + assert result["target"].iloc[0] == "transcripts" + assert result["target_key"].nunique() == 1 + assert result["target_key"].iloc[0] == "transcripts" + assert result["kernel"].nunique() == 1 + assert result["kernel"].iloc[0] == "gaussian" + assert result["bandwidth"].nunique() == 1 + assert result["bandwidth"].iloc[0] == pytest.approx(1.0) + assert result["grid_step"].nunique() == 1 + assert result["grid_step"].iloc[0] == pytest.approx(0.5) + assert result["feature_values"].dropna().iloc[0] == ("A",) + assert { + "count_density", + "geometry_measure", + "density", + "signed_distance", + "bandwidth", + "grid_step", + "kernel", + } <= set(result.columns) + + def test_smooth_density_save_to_uns(self, sdata_smooth_density: sd.SpatialData): + smooth_density_by_distance( + sdata_smooth_density, + contour_key="contours", + table_key="table", + points_key="transcripts", + feature_values="A", + bandwidth=1.0, + grid_step=0.5, + inward=4.0, + outward=4.0, + copy=False, + ) + + assert "smooth_density_by_distance" in sdata_smooth_density.tables["table"].uns + stored = sdata_smooth_density.tables["table"].uns["smooth_density_by_distance"] + assert isinstance(stored, pd.DataFrame) + assert {"signed_distance", "count_density", "geometry_measure", "density"} <= set(stored.columns) + + def test_smooth_density_is_boundary_enriched(self, sdata_smooth_density: sd.SpatialData): + result = smooth_density_by_distance( + sdata_smooth_density, + contour_key="contours", + table_key="table", + points_key="transcripts", + feature_values="A", + bandwidth=1.0, + grid_step=0.5, + inward=4.0, + outward=4.0, + copy=True, + ) + + near_boundary = result.loc[result["signed_distance"] == 0.0, "count_density"].item() + far_from_boundary = result.loc[result["signed_distance"] == 3.0, "count_density"].item() + + assert near_boundary > far_from_boundary + assert result["density"].dropna().nunique() > 8 + + def test_smooth_density_partition_invariance(self): + transcripts_df = pd.DataFrame( + { + "x": [0.5, 0.8, 1.2, -0.5, -0.8, -1.2, 10.5, 10.8, 3.5, 4.0, 13.5, 14.0], + "y": [5.0] * 12, + "feature_name": ["A"] * 12, + "cell_id": ["cell_0"] * 12, + } + ) + + sdata_single = _make_sdata(transcripts_df, npartitions=1) + sdata_multi = _make_sdata(transcripts_df, npartitions=3) + + result_single = smooth_density_by_distance( + sdata_single, + contour_key="contours", + table_key="table", + feature_values="A", + bandwidth=1.0, + grid_step=0.5, + inward=4.0, + outward=4.0, + copy=True, + ) + result_multi = smooth_density_by_distance( + sdata_multi, + contour_key="contours", + table_key="table", + feature_values="A", + bandwidth=1.0, + grid_step=0.5, + inward=4.0, + outward=4.0, + copy=True, + ) + + result_single = result_single.sort_values("signed_distance", kind="stable").reset_index(drop=True) + result_multi = result_multi.sort_values("signed_distance", kind="stable").reset_index(drop=True) + + assert np.allclose( + result_single["count_density"].to_numpy(dtype=float), + result_multi["count_density"].to_numpy(dtype=float), + ) + assert np.allclose( + result_single["geometry_measure"].to_numpy(dtype=float), + result_multi["geometry_measure"].to_numpy(dtype=float), + ) + assert np.allclose( + result_single["density"].to_numpy(dtype=float), + result_multi["density"].to_numpy(dtype=float), + equal_nan=True, + ) + + def test_smooth_density_geometry_measure_matches_local_shell_area(self, sdata_smooth_density: sd.SpatialData): + result = smooth_density_by_distance( + sdata_smooth_density, + contour_key="contours", + table_key="table", + feature_values="A", + bandwidth=1.0, + grid_step=0.5, + inward=6.0, + outward=4.0, + copy=True, + ) + + contour = sdata_smooth_density.shapes["contours"].geometry.iloc[0] + row = result.loc[result["signed_distance"] == 1.0].iloc[0] + expected_local_area = _shell_area(contour, 0.75, 1.25) + assert row["geometry_measure"] * row["grid_step"] == pytest.approx(expected_local_area) + + deep_row = result.loc[result["signed_distance"] == -6.0].iloc[0] + assert deep_row["geometry_measure"] == pytest.approx(0.0) + assert np.isnan(deep_row["density"]) + + def test_smooth_density_reflection_correction_at_range_edge(self, sdata_boundary_probe: sd.SpatialData): + result = smooth_density_by_distance( + sdata_boundary_probe, + contour_key="contours", + table_key="table", + feature_values="A", + bandwidth=1.0, + grid_step=0.5, + inward=4.0, + outward=4.0, + copy=True, + ) + + observed = result.loc[result["signed_distance"] == -4.0, "count_density"].item() + expected = 2.0 * np.exp(-0.5 * (0.1**2)) / np.sqrt(2.0 * np.pi) + assert observed == pytest.approx(expected, rel=1e-6)