Skip to content

Commit 10f1823

Browse files
feat: add Ramer-Douglas-Peucker polyline simplification algorithm
1 parent 7e4b60b commit 10f1823

File tree

1 file changed

+177
-0
lines changed

1 file changed

+177
-0
lines changed

geometry/ramer_douglas_peucker.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
"""
2+
Ramer-Douglas-Peucker (RDP) algorithm for polyline simplification.
3+
4+
Given a curve represented as a sequence of points and a tolerance epsilon,
5+
the algorithm recursively reduces the number of points while preserving the
6+
overall shape of the curve. Points that deviate from the simplified line by
7+
less than epsilon are removed.
8+
9+
Time complexity: O(n log n) on average, O(n²) worst case
10+
Space complexity: O(n) for the recursion stack
11+
12+
References:
13+
- https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm
14+
- Ramer, U. (1972). "An iterative procedure for the polygonal approximation
15+
of plane curves". Computer Graphics and Image Processing. 1 (3): 244-256.
16+
- Douglas, D.; Peucker, T. (1973). "Algorithms for the reduction of the number
17+
of points required to represent a digitized line or its caricature".
18+
Cartographica. 10 (2): 112-122.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import math
24+
from collections.abc import Sequence
25+
26+
# ---------------------------------------------------------------------------
27+
# Internal helpers
28+
# ---------------------------------------------------------------------------
29+
30+
31+
def _euclidean_distance(a: tuple[float, float], b: tuple[float, float]) -> float:
32+
"""Return the Euclidean distance between two 2-D points.
33+
34+
>>> _euclidean_distance((0.0, 0.0), (3.0, 4.0))
35+
5.0
36+
>>> _euclidean_distance((1.0, 1.0), (1.0, 1.0))
37+
0.0
38+
"""
39+
return math.hypot(b[0] - a[0], b[1] - a[1])
40+
41+
42+
def _perpendicular_distance(
43+
p: tuple[float, float],
44+
a: tuple[float, float],
45+
b: tuple[float, float],
46+
) -> float:
47+
"""Return the perpendicular distance from point *p* to the line through *a* and *b*.
48+
49+
The result is the absolute value of the signed area of the triangle (a, b, p)
50+
divided by the length of segment ab, which equals the altitude of that triangle
51+
from p.
52+
53+
>>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0))
54+
4.0
55+
>>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0))
56+
4.0
57+
>>> # order of a and b does not affect the result
58+
>>> _perpendicular_distance((4.0, 0.0), (0.0, 3.0), (0.0, 0.0))
59+
4.0
60+
>>> _perpendicular_distance((4.0, 1.0), (0.0, 1.0), (0.0, 4.0))
61+
4.0
62+
>>> _perpendicular_distance((2.0, 1.0), (-2.0, 1.0), (-2.0, 4.0))
63+
4.0
64+
"""
65+
px, py = p
66+
ax, ay = a
67+
bx, by = b
68+
numerator = abs((by - ay) * px - (bx - ax) * py + bx * ay - by * ax)
69+
denominator = _euclidean_distance(a, b)
70+
if denominator == 0.0:
71+
# a and b coincide; fall back to point-to-point distance
72+
return _euclidean_distance(p, a)
73+
return numerator / denominator
74+
75+
76+
# ---------------------------------------------------------------------------
77+
# Public API
78+
# ---------------------------------------------------------------------------
79+
80+
81+
def ramer_douglas_peucker(
82+
points: Sequence[tuple[float, float]],
83+
epsilon: float,
84+
) -> list[tuple[float, float]]:
85+
"""Simplify a polyline using the Ramer-Douglas-Peucker algorithm.
86+
87+
Parameters
88+
----------
89+
points:
90+
An ordered sequence of ``(x, y)`` tuples that form the polyline.
91+
epsilon:
92+
Maximum allowable perpendicular deviation. Points whose distance
93+
to the simplified segment is less than or equal to *epsilon* are
94+
discarded. Must be non-negative.
95+
96+
Returns
97+
-------
98+
list[tuple[float, float]]
99+
A simplified list of ``(x, y)`` points that is a subset of *points*.
100+
101+
Raises
102+
------
103+
ValueError
104+
If *epsilon* is negative.
105+
106+
Examples
107+
--------
108+
Collinear points - middle point is redundant for any positive epsilon:
109+
110+
>>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.0), (2.0, 0.0)], epsilon=0.5)
111+
[(0.0, 0.0), (2.0, 0.0)]
112+
113+
Empty / tiny inputs are returned unchanged:
114+
115+
>>> ramer_douglas_peucker([], epsilon=1.0)
116+
[]
117+
>>> ramer_douglas_peucker([(0.0, 0.0)], epsilon=1.0)
118+
[(0.0, 0.0)]
119+
>>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 1.0)], epsilon=1.0)
120+
[(0.0, 0.0), (1.0, 1.0)]
121+
122+
A simple square outline where epsilon removes mid-edge points:
123+
124+
>>> square = [
125+
... (0.0, 0.0), (1.0, 0.0), (2.0, 0.0),
126+
... (2.0, 1.0), (2.0, 2.0), (1.0, 2.0),
127+
... (0.0, 2.0), (0.0, 1.0),
128+
... ]
129+
>>> ramer_douglas_peucker(square, epsilon=0.7)
130+
[(0.0, 0.0), (2.0, 0.0), (2.0, 2.0), (0.0, 2.0), (0.0, 1.0)]
131+
132+
A polygonal chain simplified to a single segment for large epsilon:
133+
134+
>>> chain = [(0.0, 0.0), (2.0, 0.5), (3.0, 3.0), (6.0, 3.0), (8.0, 4.0)]
135+
>>> ramer_douglas_peucker(chain, epsilon=3.0)
136+
[(0.0, 0.0), (8.0, 4.0)]
137+
138+
Zero epsilon keeps all points:
139+
140+
>>> ramer_douglas_peucker([(0.0, 0.0), (0.5, 0.1), (1.0, 0.0)], epsilon=0.0)
141+
[(0.0, 0.0), (0.5, 0.1), (1.0, 0.0)]
142+
"""
143+
if epsilon < 0:
144+
msg = f"epsilon must be non-negative, got {epsilon!r}"
145+
raise ValueError(msg)
146+
147+
pts = list(points)
148+
149+
if len(pts) < 3:
150+
return pts
151+
152+
# Find the point with the greatest perpendicular distance from the line
153+
# connecting the first and last points.
154+
start, end = pts[0], pts[-1]
155+
max_dist = 0.0
156+
max_index = 0
157+
for i in range(1, len(pts) - 1):
158+
dist = _perpendicular_distance(pts[i], start, end)
159+
if dist > max_dist:
160+
max_dist = dist
161+
max_index = i
162+
163+
if max_dist > epsilon:
164+
# Recursively simplify both halves and join them (drop the duplicate
165+
# point at the junction).
166+
left = ramer_douglas_peucker(pts[: max_index + 1], epsilon)
167+
right = ramer_douglas_peucker(pts[max_index:], epsilon)
168+
return left[:-1] + right
169+
170+
# All intermediate points are within tolerance - keep only the endpoints.
171+
return [start, end]
172+
173+
174+
if __name__ == "__main__":
175+
import doctest
176+
177+
doctest.testmod()

0 commit comments

Comments
 (0)