|
| 1 | +""" |
| 2 | +Ramer-Douglas-Peucker (RDP) algorithm for polyline simplification. |
| 3 | +
|
| 4 | +Given a curve represented as a sequence of points and a tolerance epsilon, |
| 5 | +the algorithm recursively reduces the number of points while preserving the |
| 6 | +overall shape of the curve. Points that deviate from the simplified line by |
| 7 | +less than epsilon are removed. |
| 8 | +
|
| 9 | +Time complexity: O(n log n) on average, O(n²) worst case |
| 10 | +Space complexity: O(n) for the recursion stack |
| 11 | +
|
| 12 | +References: |
| 13 | +- https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm |
| 14 | +- Ramer, U. (1972). "An iterative procedure for the polygonal approximation |
| 15 | + of plane curves". Computer Graphics and Image Processing. 1 (3): 244-256. |
| 16 | +- Douglas, D.; Peucker, T. (1973). "Algorithms for the reduction of the number |
| 17 | + of points required to represent a digitized line or its caricature". |
| 18 | + Cartographica. 10 (2): 112-122. |
| 19 | +""" |
| 20 | + |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import math |
| 24 | +from collections.abc import Sequence |
| 25 | + |
| 26 | +# --------------------------------------------------------------------------- |
| 27 | +# Internal helpers |
| 28 | +# --------------------------------------------------------------------------- |
| 29 | + |
| 30 | + |
| 31 | +def _euclidean_distance(a: tuple[float, float], b: tuple[float, float]) -> float: |
| 32 | + """Return the Euclidean distance between two 2-D points. |
| 33 | +
|
| 34 | + >>> _euclidean_distance((0.0, 0.0), (3.0, 4.0)) |
| 35 | + 5.0 |
| 36 | + >>> _euclidean_distance((1.0, 1.0), (1.0, 1.0)) |
| 37 | + 0.0 |
| 38 | + """ |
| 39 | + return math.hypot(b[0] - a[0], b[1] - a[1]) |
| 40 | + |
| 41 | + |
| 42 | +def _perpendicular_distance( |
| 43 | + p: tuple[float, float], |
| 44 | + a: tuple[float, float], |
| 45 | + b: tuple[float, float], |
| 46 | +) -> float: |
| 47 | + """Return the perpendicular distance from point *p* to the line through *a* and *b*. |
| 48 | +
|
| 49 | + The result is the absolute value of the signed area of the triangle (a, b, p) |
| 50 | + divided by the length of segment ab, which equals the altitude of that triangle |
| 51 | + from p. |
| 52 | +
|
| 53 | + >>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0)) |
| 54 | + 4.0 |
| 55 | + >>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0)) |
| 56 | + 4.0 |
| 57 | + >>> # order of a and b does not affect the result |
| 58 | + >>> _perpendicular_distance((4.0, 0.0), (0.0, 3.0), (0.0, 0.0)) |
| 59 | + 4.0 |
| 60 | + >>> _perpendicular_distance((4.0, 1.0), (0.0, 1.0), (0.0, 4.0)) |
| 61 | + 4.0 |
| 62 | + >>> _perpendicular_distance((2.0, 1.0), (-2.0, 1.0), (-2.0, 4.0)) |
| 63 | + 4.0 |
| 64 | + """ |
| 65 | + px, py = p |
| 66 | + ax, ay = a |
| 67 | + bx, by = b |
| 68 | + numerator = abs((by - ay) * px - (bx - ax) * py + bx * ay - by * ax) |
| 69 | + denominator = _euclidean_distance(a, b) |
| 70 | + if denominator == 0.0: |
| 71 | + # a and b coincide; fall back to point-to-point distance |
| 72 | + return _euclidean_distance(p, a) |
| 73 | + return numerator / denominator |
| 74 | + |
| 75 | + |
| 76 | +# --------------------------------------------------------------------------- |
| 77 | +# Public API |
| 78 | +# --------------------------------------------------------------------------- |
| 79 | + |
| 80 | + |
| 81 | +def ramer_douglas_peucker( |
| 82 | + points: Sequence[tuple[float, float]], |
| 83 | + epsilon: float, |
| 84 | +) -> list[tuple[float, float]]: |
| 85 | + """Simplify a polyline using the Ramer-Douglas-Peucker algorithm. |
| 86 | +
|
| 87 | + Parameters |
| 88 | + ---------- |
| 89 | + points: |
| 90 | + An ordered sequence of ``(x, y)`` tuples that form the polyline. |
| 91 | + epsilon: |
| 92 | + Maximum allowable perpendicular deviation. Points whose distance |
| 93 | + to the simplified segment is less than or equal to *epsilon* are |
| 94 | + discarded. Must be non-negative. |
| 95 | +
|
| 96 | + Returns |
| 97 | + ------- |
| 98 | + list[tuple[float, float]] |
| 99 | + A simplified list of ``(x, y)`` points that is a subset of *points*. |
| 100 | +
|
| 101 | + Raises |
| 102 | + ------ |
| 103 | + ValueError |
| 104 | + If *epsilon* is negative. |
| 105 | +
|
| 106 | + Examples |
| 107 | + -------- |
| 108 | + Collinear points - middle point is redundant for any positive epsilon: |
| 109 | +
|
| 110 | + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.0), (2.0, 0.0)], epsilon=0.5) |
| 111 | + [(0.0, 0.0), (2.0, 0.0)] |
| 112 | +
|
| 113 | + Empty / tiny inputs are returned unchanged: |
| 114 | +
|
| 115 | + >>> ramer_douglas_peucker([], epsilon=1.0) |
| 116 | + [] |
| 117 | + >>> ramer_douglas_peucker([(0.0, 0.0)], epsilon=1.0) |
| 118 | + [(0.0, 0.0)] |
| 119 | + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 1.0)], epsilon=1.0) |
| 120 | + [(0.0, 0.0), (1.0, 1.0)] |
| 121 | +
|
| 122 | + A simple square outline where epsilon removes mid-edge points: |
| 123 | +
|
| 124 | + >>> square = [ |
| 125 | + ... (0.0, 0.0), (1.0, 0.0), (2.0, 0.0), |
| 126 | + ... (2.0, 1.0), (2.0, 2.0), (1.0, 2.0), |
| 127 | + ... (0.0, 2.0), (0.0, 1.0), |
| 128 | + ... ] |
| 129 | + >>> ramer_douglas_peucker(square, epsilon=0.7) |
| 130 | + [(0.0, 0.0), (2.0, 0.0), (2.0, 2.0), (0.0, 2.0), (0.0, 1.0)] |
| 131 | +
|
| 132 | + A polygonal chain simplified to a single segment for large epsilon: |
| 133 | +
|
| 134 | + >>> chain = [(0.0, 0.0), (2.0, 0.5), (3.0, 3.0), (6.0, 3.0), (8.0, 4.0)] |
| 135 | + >>> ramer_douglas_peucker(chain, epsilon=3.0) |
| 136 | + [(0.0, 0.0), (8.0, 4.0)] |
| 137 | +
|
| 138 | + Zero epsilon keeps all points: |
| 139 | +
|
| 140 | + >>> ramer_douglas_peucker([(0.0, 0.0), (0.5, 0.1), (1.0, 0.0)], epsilon=0.0) |
| 141 | + [(0.0, 0.0), (0.5, 0.1), (1.0, 0.0)] |
| 142 | + """ |
| 143 | + if epsilon < 0: |
| 144 | + msg = f"epsilon must be non-negative, got {epsilon!r}" |
| 145 | + raise ValueError(msg) |
| 146 | + |
| 147 | + pts = list(points) |
| 148 | + |
| 149 | + if len(pts) < 3: |
| 150 | + return pts |
| 151 | + |
| 152 | + # Find the point with the greatest perpendicular distance from the line |
| 153 | + # connecting the first and last points. |
| 154 | + start, end = pts[0], pts[-1] |
| 155 | + max_dist = 0.0 |
| 156 | + max_index = 0 |
| 157 | + for i in range(1, len(pts) - 1): |
| 158 | + dist = _perpendicular_distance(pts[i], start, end) |
| 159 | + if dist > max_dist: |
| 160 | + max_dist = dist |
| 161 | + max_index = i |
| 162 | + |
| 163 | + if max_dist > epsilon: |
| 164 | + # Recursively simplify both halves and join them (drop the duplicate |
| 165 | + # point at the junction). |
| 166 | + left = ramer_douglas_peucker(pts[: max_index + 1], epsilon) |
| 167 | + right = ramer_douglas_peucker(pts[max_index:], epsilon) |
| 168 | + return left[:-1] + right |
| 169 | + |
| 170 | + # All intermediate points are within tolerance - keep only the endpoints. |
| 171 | + return [start, end] |
| 172 | + |
| 173 | + |
| 174 | +if __name__ == "__main__": |
| 175 | + import doctest |
| 176 | + |
| 177 | + doctest.testmod() |
0 commit comments