From bc9441fa5e27c589a647b1249972910e3cd101fa Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 21 May 2026 12:34:21 -0400 Subject: [PATCH 1/2] chore: add .gitattributes and normalize line endings to LF Enforce LF storage in the repository via .gitattributes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .gitattributes | 19 + .../division_area/bad-missing-is-land.yaml | 20 +- .../segment/road/bad-road-destinations.yaml | 104 +- gers/examples/python/MATCH_TRACES.md | 436 +++---- gers/examples/python/README.md | 40 +- gers/examples/python/__init__.py | 2 +- gers/examples/python/constants.py | 54 +- gers/examples/python/match_classes.py | 432 +++---- gers/examples/python/match_traces.py | 1016 ++++++++--------- gers/examples/python/route_utils.py | 180 +-- .../python/tests/match_traces_test.py | 102 +- gers/examples/python/tests/test_setup.py | 8 +- gers/examples/python/tests/utils_test.py | 104 +- gers/examples/python/utils.py | 422 +++---- .../division_area/bad-missing-is-land.yaml | 26 +- .../segment/road/bad-road-destinations.yaml | 104 +- 16 files changed, 1544 insertions(+), 1525 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..107c90ba4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,19 @@ +# Normalize all text files to LF in the repository. +# Contributors on Windows get CRLF in their working tree via core.autocrlf, +# but the repo itself always stores LF. +* text=auto eol=lf + +# Ensure these are always treated as text (LF in repo). +*.py text eol=lf +*.toml text eol=lf +*.yaml text eol=lf +*.yml text eol=lf +*.md text eol=lf +*.sh text eol=lf +*.json text eol=lf + +# Binary files — do not normalize. +*.png binary +*.jpg binary +*.gif binary +*.ico binary diff --git a/counterexamples/divisions/division_area/bad-missing-is-land.yaml b/counterexamples/divisions/division_area/bad-missing-is-land.yaml index 98842b080..41ecd711d 100644 --- a/counterexamples/divisions/division_area/bad-missing-is-land.yaml +++ b/counterexamples/divisions/division_area/bad-missing-is-land.yaml @@ -1,13 +1,13 @@ ---- -id: counterexample:division_area:bad-is-territorial -type: Feature -geometry: - type: LineString - coordinates: [[0, 1], [1, 2]] -properties: - theme: divisions - type: division_area - version: 0 +--- +id: counterexample:division_area:bad-is-territorial +type: Feature +geometry: + type: LineString + coordinates: [[0, 1], [1, 2]] +properties: + theme: divisions + type: division_area + version: 0 subtype: country division_id: counterexample:division_boundary:some-division is_territorial: false diff --git a/counterexamples/transportation/segment/road/bad-road-destinations.yaml b/counterexamples/transportation/segment/road/bad-road-destinations.yaml index ac39be034..d283b3b13 100644 --- a/counterexamples/transportation/segment/road/bad-road-destinations.yaml +++ b/counterexamples/transportation/segment/road/bad-road-destinations.yaml @@ -1,52 +1,52 @@ ---- -id: overture:transportation:segment:example:destinations:1 -type: Feature -geometry: - type: LineString - coordinates: [[0, 0], [1, 1]] -properties: - theme: transportation - type: segment - subtype: road - class: secondary - version: 0 - connectors: - - connector_id: overture:transportation:connector:123 - at: 0 - - connector_id: overture:transportation:connector:678 - at: 1 - destinations: - - labels: - - value: Seattle - type: unknown - - value: Seattle - type: unknown - - value: Redmond - - value: - foo: bar - type: unknown - - value: Bellevue - type: other - - value: Main Street - type: street - - value: I90 - type: route - symbols: - - airport - from: - - segment_id: overture:transportation:segment:234 - - segment_id: overture:transportation:segment:567 - connector_id: overture:transportation:connector:567 - foo: bar - when: - heading: forward - to_connector_id: overture:transportation:connector:123 - final_heading: backward - - labels: - - value: Kirkland - type: unknown - from: - - segment_id: overture:transportation:segment:567 - connector_id: overture:transportation:connector:567 - ext_expected_errors: - - "destinations/items/properties/labels/uniqueItems]: items at 0 and 1 are equal" +--- +id: overture:transportation:segment:example:destinations:1 +type: Feature +geometry: + type: LineString + coordinates: [[0, 0], [1, 1]] +properties: + theme: transportation + type: segment + subtype: road + class: secondary + version: 0 + connectors: + - connector_id: overture:transportation:connector:123 + at: 0 + - connector_id: overture:transportation:connector:678 + at: 1 + destinations: + - labels: + - value: Seattle + type: unknown + - value: Seattle + type: unknown + - value: Redmond + - value: + foo: bar + type: unknown + - value: Bellevue + type: other + - value: Main Street + type: street + - value: I90 + type: route + symbols: + - airport + from: + - segment_id: overture:transportation:segment:234 + - segment_id: overture:transportation:segment:567 + connector_id: overture:transportation:connector:567 + foo: bar + when: + heading: forward + to_connector_id: overture:transportation:connector:123 + final_heading: backward + - labels: + - value: Kirkland + type: unknown + from: + - segment_id: overture:transportation:segment:567 + connector_id: overture:transportation:connector:567 + ext_expected_errors: + - "destinations/items/properties/labels/uniqueItems]: items at 0 and 1 are equal" diff --git a/gers/examples/python/MATCH_TRACES.md b/gers/examples/python/MATCH_TRACES.md index 5e26495ad..e043da0f1 100644 --- a/gers/examples/python/MATCH_TRACES.md +++ b/gers/examples/python/MATCH_TRACES.md @@ -1,218 +1,218 @@ -# Match Example: GPS Traces to Overture Road Segments - -This page describes an example of how one could match a data set with GPS traces to the corresponding overture road segments. - -Alternative approaches include converting the Overture data set to OSM format, loading it in one of the routing engines available and use the map matching services available, like [OSRM](http://project-osrm.org/docs/v5.5.1/api/#match-service), [GraphHopper](https://github.com/graphhopper/map-matching), [Valhalla](https://valhalla.github.io/valhalla/api/map-matching/api-reference/). - -We are providing for demo purposes a basic implementation in python based on an approach commonly used, described in this paper: [Hidden Markov Map Matching](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/12/map-matching-ACM-GIS-camera-ready.pdf), that takes overture road segments as input. It is not intended to be a solution for all types of GPS trace data, but it can be a starting place in understanding how this can be achieved. - -The match process is exemplified below using a few mock traces as well as a few GPS traces from [OpenStreetMap.org](https://www.openstreetmap.org/traces) in the city of Macon Georgia, USA. This data is used only for purposes of illustrating the match process, and the results will be different for different quality of traces data. - -## Inputs -We will use two inputs for the example: - -1. Overture road segments: [data\overture-transportation-macon.geojson](data\overture-transportation-macon.geojson) - Please note that this data set is included for demonstrative purposes, it is a sample that doesn't contain the latest properties defined in the Overture schema and its GERS IDs are provisional. -2. GPS traces to be matched: - [data\macon-osm-traces-combined.geojson](data\macon-osm-traces-combined.geojson) - sampled from OSM, see below for details. - - [data\macon-manual-traces.geojson](data\macon-manual-traces.geojson) - mock traces simulating some noise edge cases with labeled expected prediction. - -Below we describe how we prepared the two data sets for matching for reference, but we also include them so you can experiment with matching directly. - -## Overture Data Set - -Please see instructions on how to use Athena to select the subset of the overture data set within a city for example. - -## GPS Traces to be matched - -GPS traces can be stored in many formats, some of the most common including GPX, KML, CSV, GeoJSON. - -In the case of OpenStreetMaps GPS traces we have GPX input traces, but we convert to GeoJSON for convenience, since having both data sets as GeoJSONs makes it very easy to initialize them both in the common class `MatchableFeature`. - -Since conversion between these formats is trivial, we consider it outside the scope for this exercise. - -In our example we downloaded public traces from openstreetmap.org. - -A sample sub-set of the raw GPX OpenStreetMaps traces were converted to geojson format, with the times for each point stored as `properties.times`. - -Points that are too close to each other, either distance-wise (<50 meters) or time-wise (<1sec), were filtered out. -We also split traces that have big gaps between points (>100 meters) into separate traces. This was done to avoid processing a lot of data that doesn't add much useful information to the trace. Parameters are chosen arbitrarily, and appropriate values depend on the traces data and what type of sidecar feed we're trying to produce with what type of quality and performance constraints. There are more elaborate approaches for picking which points to drop, when to split traces and other preprocessing, but that is outside the scope of this exercise. - -Result is a demo-size set of traces that can be used to obtain for example the average travel speeds per overture road segment or other traffic relevant information. - -An `id` is generated to uniquely identify each such trace and source properties are added to help identify each processed trace and its original source of data. - -An example trace to match as geojson: -```json -{ - "type": "Feature", - "id": "trace#0", - "geometry": { - "type": "LineString", - "coordinates": [[-83.630794, 32.850851], ] - }, - "properties": { - "filename": "osm-traces-page-0.gpx", - "track.number": 0, - "track.link": "/user/sunnypilot/traces/7824504", - "track.name": "2023_06_05T12_07_14.093431Z.gpx", - "track.segment.number": 0, - "track.segment.split.number": 0, - "track.description": "Routes from sunnypilot 2022.11.13 (HYUNDAI SONATA 2020).", - "times": ["2023-06-05 12:07:14+00:00", ] - } -} -``` - -### Dependencies - -``` -pip install shapely h3 geopandas geojson haversine gpxpy -``` -Or: -``` -pip install -r gers/examples/python/requirements.txt -``` - -## Run with script - -Example parameters for running traces matching with the sidecar_match.py script: -``` -cd gers/examples/python/data -python match_traces.py --input-to-match data/macon-manual-traces.geojson --input-overture data/overture-transportation-macon.geojson --output data/match-result.json -``` - -See all (optional) parameters by running it with `-h`. - -The script uses [H3 tiles](https://h3geo.org/) to first filter road segment candidates spatially. - -## Run with notebook -Alternative is to perform the traces match via notebook available here: [match_traces.ipynb](match_traces.ipynb) - -This approach uses geopandas for spatial join step of finding road candidates, then constructs both data sets into the same python object `MatchableFeature` and calls same trace match code. - -## Output - -The output file will contain per each point in each trace the prediction of the most likely traveled road. The original point from the trace as well as the predicted point on the road segment are provided, along with useful information that can be used to infer the actual route traveled and the speed like the timestamp for the point, distance traveled on the road network since last point. - -Additional metrics are provided for the whole trace in the match result object. - -Below is an example of the output for a trace: -```json - { - "id": "trace#1", - "elapsed": 0.6450104000105057, - "source_length": 5165.4, - "route_length": 5167.13, - "points": [ - - - { - "original_point": "POINT (-83.586113 32.818006)", - "time": "2023-06-04 21:01:52+00:00", - "seconds_since_prev_point": 2.0, - "snap_prediction": { - "id": "8544c0bbfffffff-17976b4158ac1b2f", - "snapped_point": "POINT (-83.58613449627562 32.81796863910759)", - "distance_to_snapped_road": 4.61, - "route_distance_to_prev_point": 50.35 - } - }, - - - ], - "points_with_matches": 101, - "avg_dist_to_road": 3.12, - "sequence_breaks": 0, - "revisited_via_points": 0, - "revisited_segments": 0, - "target_candidates_count": 34, - "target_ids": [ - "8744c0a36ffffff-13d7eb54760e4d65", - "8744c0a36ffffff-13979f2200827e1f", - "8544c0bbfffffff-17976b4158ac1b2f", - "8744c0a36ffffff-17d7b86aff4e68cd" - ] - }, -``` - -## Metrics -### Match Quality -The match quality between your feed and overture roads is influenced by multiple factors: -1. Noise level of the traces data. -2. Disagreement between traces data and overture data. -3. Match quality of the algorithm. - -We propose two types for metrics, error rate via manually labeled set, which allows you to decouple the data disagreements problems to be able to focus on the algorithm itself, and automatic quality proxy metrics, like indicators associated with match problems, which are provided automatically for your whole feed when you run the match algorithm, but are only indirect approximations of how good the match is. - -**Error Rate via Manually Labeled Set** - -This approach provides highest level of insight into how well the algorithm performs, but because it requires human labeling which is costly to obtain, we only recommend it if planning to debug or develop the algorithm. -Below are instructions on how to obtain the metric for your feed, and we exemplify it with a few manually labeled traces. - -1. Select the traces that will make up the truth set. -2. Run the match algorithm as described above, with -j parameter. This will create as one of the outputs a file ending in `for_judgment.txt` which is a tab separated text file with a row for each point in each trace and the GERS ID that the algorithm found: - - |trace_id|point_index|trace_point_wkt|gers_id| - |-|-|-|-| - |manual_trace#1|0|POINT (-83.6455155 32.8246168)|`8844c0b1a7fffff-17fff78c078ff50b`| - |manual_trace#1|1|POINT (-83.64514 32.8251578)|`8844c0b1a7fffff-13def9663b8c091b`| - |...|||| - - This will serve as a starting point for our "truth set", by using the results of the match to "pre-label" the data. -3. Review the matches in QGIS. Load the overture features, the "pre-labeled" for_judgment.txt points, and the `snapped_points.txt` file. This should make it easy to observe which of the matches are incorrect. Optionally you could add an OSM tiles layer for example to add more context. -4. Save the corrected labels file as `.labeled.txt`. In our example this file can be found here: [data\macon-manual-traces.labeled.txt](data\macon-manual-traces.labeled.txt) -5. Compute **Error Rate** metric. This is done automatically by the script if a .labeled.txt file exists corresponding to the input traces file. -Error Rate is defined as the ratio between the length of the traces for which the prediction is matching the labeled set and the total length of the traces. - -**Automatic Quality Proxy Metrics** - -Because obtaining labeled data for a representative set can be difficult, we provide as alternative the metrics below that are calculated automatically when running the script. - -**Note**: all the metrics are averaged for the whole set and per each trace in the output. For most of them (all except first two) they are also provided per-km of length of trace, which are independent of trace lengths to facilitate cross-set comparisons. Length of trace in this context is calculated as sum of distances between each point of the input GPS trace. While a more "correct" length of trace would be the route distance, and you can still compute that yourself from the match result, we are using this definition because for some traces we won't be able to find a full or even partial route. Side effect is that the per-km metrics give more importance to points with noise, which artificially adds length to the truly traveled length. - -1. **Average distance to snapped road** - per trace. How far away are the GPS points from the snapped road in meters. Not counting points without matches. -2. **Snapped route length to GPS length ratio** - per trace. The ratio between the sum of route distances between points that we were able to match and the sum if distances between trace points. In ideal case with no GPS noise, agreement with the map and perfect match result this metric would get close to 1. Lower number can mean higher disagreement, missing roads in overture data set, or incorrect matches. 0 means nothing got matched. Values greater than 1 are also possible and valid, but could indicate incorrect route matches. -3. **Number of candidate segments** - per trace, per km. This counts how many roads are considered by the algorithm, as having common H3 tiles with the k-ringed H3 tiles of the trace. -4. **Number of matched segments** - per trace, per km. This will naturally vary from one type of road to another, but correlated with other dimensions it can be useful to detect outliers or problems. Zero means no match was found for any point of the trace. Also, a high discrepancy between number of candidate segments and number of matched segments could mean the algo is spending too much time considering too many candidates, see how to tweak performance in the section below. -5. **Number of sequence breaks** - per trace, per km. A sequence break can happen when there is missing or bad data in overture roads or in the trace or simply they disagree enough that a gap of no possible route is detected. -6. **Via-point revisits** - per trace, per km. A high number can be an indicator a lot of U-turns are predicted, which although can occur naturally, past some threshold can be a sign that the matches are incorrect. -7. **Segment revisits** - per trace, per km. Depending on your data, some traces will validly pass through same road segment again after having left it, but in many cases this is probably rare. An unusually high number can indicate wrong matches. - -For example, when matching the sample OSM traces with these options: -```json -{ - "sigma": 4.1, - "beta": 0.9, - "allow_loops": "True", - "max_point_to_road_distance": 30.0, - "max_route_to_trace_distance_difference": 300, - "revisit_segment_penalty_weight": 100, - "revisit_via_point_penalty_weight": 100, - "broken_time_gap_reset_sequence": 60, - "broken_distance_gap_reset_sequence": 300 -} -``` -The script will output these metrics (runtimes will vary depending on machine): -``` -Traces.............................157 -Target features....................22324 -Elapsed:...........................1min 50.846s -Avg runtime/trace..................0.706s -Avg runtime/km.....................0.178s -Avg distance to snapped road.......2.92m -Snapped route length...............574.95km -GPS traces length..................622.47km -Snapped route len/gps len..........0.92 -Avg number of candidate segments...58.69/trace, 14.80/km -Avg number of matched segments.....8.43/trace, 2.13/km -Avg number of sequence breaks......0.26/trace, 0.07/km -Avg number of revisited via points.0.31/trace, 0.08/km -Avg number of revisited segments...0.18/trace, 0.05/km -``` - -### Performance notes -While this demo match algorithm is not designed for performance, various parameters of the allow controlling the tradeoff between match quality and runtime. For example increasing `max_point_to_road_distance` will allow matching traces that are further away from the roads, thus increasing match recall for noisy GPS traces. However, this can increase significantly the runtime needed, since it increases the number of candidate roads to consider. - -For each each trace to be matched we provide the time elapsed in the output property `TraceMatchResult.elapsed` in seconds. This can be used to analyze how runtime correlates with various properties of the data, like trace length, number of points, number of sequence breaks or the various match parameters, or for identifying bottlenecks. - +# Match Example: GPS Traces to Overture Road Segments + +This page describes an example of how one could match a data set with GPS traces to the corresponding overture road segments. + +Alternative approaches include converting the Overture data set to OSM format, loading it in one of the routing engines available and use the map matching services available, like [OSRM](http://project-osrm.org/docs/v5.5.1/api/#match-service), [GraphHopper](https://github.com/graphhopper/map-matching), [Valhalla](https://valhalla.github.io/valhalla/api/map-matching/api-reference/). + +We are providing for demo purposes a basic implementation in python based on an approach commonly used, described in this paper: [Hidden Markov Map Matching](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/12/map-matching-ACM-GIS-camera-ready.pdf), that takes overture road segments as input. It is not intended to be a solution for all types of GPS trace data, but it can be a starting place in understanding how this can be achieved. + +The match process is exemplified below using a few mock traces as well as a few GPS traces from [OpenStreetMap.org](https://www.openstreetmap.org/traces) in the city of Macon Georgia, USA. This data is used only for purposes of illustrating the match process, and the results will be different for different quality of traces data. + +## Inputs +We will use two inputs for the example: + +1. Overture road segments: [data\overture-transportation-macon.geojson](data\overture-transportation-macon.geojson) - Please note that this data set is included for demonstrative purposes, it is a sample that doesn't contain the latest properties defined in the Overture schema and its GERS IDs are provisional. +2. GPS traces to be matched: + [data\macon-osm-traces-combined.geojson](data\macon-osm-traces-combined.geojson) - sampled from OSM, see below for details. + + [data\macon-manual-traces.geojson](data\macon-manual-traces.geojson) - mock traces simulating some noise edge cases with labeled expected prediction. + +Below we describe how we prepared the two data sets for matching for reference, but we also include them so you can experiment with matching directly. + +## Overture Data Set + +Please see instructions on how to use Athena to select the subset of the overture data set within a city for example. + +## GPS Traces to be matched + +GPS traces can be stored in many formats, some of the most common including GPX, KML, CSV, GeoJSON. + +In the case of OpenStreetMaps GPS traces we have GPX input traces, but we convert to GeoJSON for convenience, since having both data sets as GeoJSONs makes it very easy to initialize them both in the common class `MatchableFeature`. + +Since conversion between these formats is trivial, we consider it outside the scope for this exercise. + +In our example we downloaded public traces from openstreetmap.org. + +A sample sub-set of the raw GPX OpenStreetMaps traces were converted to geojson format, with the times for each point stored as `properties.times`. + +Points that are too close to each other, either distance-wise (<50 meters) or time-wise (<1sec), were filtered out. +We also split traces that have big gaps between points (>100 meters) into separate traces. This was done to avoid processing a lot of data that doesn't add much useful information to the trace. Parameters are chosen arbitrarily, and appropriate values depend on the traces data and what type of sidecar feed we're trying to produce with what type of quality and performance constraints. There are more elaborate approaches for picking which points to drop, when to split traces and other preprocessing, but that is outside the scope of this exercise. + +Result is a demo-size set of traces that can be used to obtain for example the average travel speeds per overture road segment or other traffic relevant information. + +An `id` is generated to uniquely identify each such trace and source properties are added to help identify each processed trace and its original source of data. + +An example trace to match as geojson: +```json +{ + "type": "Feature", + "id": "trace#0", + "geometry": { + "type": "LineString", + "coordinates": [[-83.630794, 32.850851], ] + }, + "properties": { + "filename": "osm-traces-page-0.gpx", + "track.number": 0, + "track.link": "/user/sunnypilot/traces/7824504", + "track.name": "2023_06_05T12_07_14.093431Z.gpx", + "track.segment.number": 0, + "track.segment.split.number": 0, + "track.description": "Routes from sunnypilot 2022.11.13 (HYUNDAI SONATA 2020).", + "times": ["2023-06-05 12:07:14+00:00", ] + } +} +``` + +### Dependencies + +``` +pip install shapely h3 geopandas geojson haversine gpxpy +``` +Or: +``` +pip install -r gers/examples/python/requirements.txt +``` + +## Run with script + +Example parameters for running traces matching with the sidecar_match.py script: +``` +cd gers/examples/python/data +python match_traces.py --input-to-match data/macon-manual-traces.geojson --input-overture data/overture-transportation-macon.geojson --output data/match-result.json +``` + +See all (optional) parameters by running it with `-h`. + +The script uses [H3 tiles](https://h3geo.org/) to first filter road segment candidates spatially. + +## Run with notebook +Alternative is to perform the traces match via notebook available here: [match_traces.ipynb](match_traces.ipynb) + +This approach uses geopandas for spatial join step of finding road candidates, then constructs both data sets into the same python object `MatchableFeature` and calls same trace match code. + +## Output + +The output file will contain per each point in each trace the prediction of the most likely traveled road. The original point from the trace as well as the predicted point on the road segment are provided, along with useful information that can be used to infer the actual route traveled and the speed like the timestamp for the point, distance traveled on the road network since last point. + +Additional metrics are provided for the whole trace in the match result object. + +Below is an example of the output for a trace: +```json + { + "id": "trace#1", + "elapsed": 0.6450104000105057, + "source_length": 5165.4, + "route_length": 5167.13, + "points": [ + + + { + "original_point": "POINT (-83.586113 32.818006)", + "time": "2023-06-04 21:01:52+00:00", + "seconds_since_prev_point": 2.0, + "snap_prediction": { + "id": "8544c0bbfffffff-17976b4158ac1b2f", + "snapped_point": "POINT (-83.58613449627562 32.81796863910759)", + "distance_to_snapped_road": 4.61, + "route_distance_to_prev_point": 50.35 + } + }, + + + ], + "points_with_matches": 101, + "avg_dist_to_road": 3.12, + "sequence_breaks": 0, + "revisited_via_points": 0, + "revisited_segments": 0, + "target_candidates_count": 34, + "target_ids": [ + "8744c0a36ffffff-13d7eb54760e4d65", + "8744c0a36ffffff-13979f2200827e1f", + "8544c0bbfffffff-17976b4158ac1b2f", + "8744c0a36ffffff-17d7b86aff4e68cd" + ] + }, +``` + +## Metrics +### Match Quality +The match quality between your feed and overture roads is influenced by multiple factors: +1. Noise level of the traces data. +2. Disagreement between traces data and overture data. +3. Match quality of the algorithm. + +We propose two types for metrics, error rate via manually labeled set, which allows you to decouple the data disagreements problems to be able to focus on the algorithm itself, and automatic quality proxy metrics, like indicators associated with match problems, which are provided automatically for your whole feed when you run the match algorithm, but are only indirect approximations of how good the match is. + +**Error Rate via Manually Labeled Set** + +This approach provides highest level of insight into how well the algorithm performs, but because it requires human labeling which is costly to obtain, we only recommend it if planning to debug or develop the algorithm. +Below are instructions on how to obtain the metric for your feed, and we exemplify it with a few manually labeled traces. + +1. Select the traces that will make up the truth set. +2. Run the match algorithm as described above, with -j parameter. This will create as one of the outputs a file ending in `for_judgment.txt` which is a tab separated text file with a row for each point in each trace and the GERS ID that the algorithm found: + + |trace_id|point_index|trace_point_wkt|gers_id| + |-|-|-|-| + |manual_trace#1|0|POINT (-83.6455155 32.8246168)|`8844c0b1a7fffff-17fff78c078ff50b`| + |manual_trace#1|1|POINT (-83.64514 32.8251578)|`8844c0b1a7fffff-13def9663b8c091b`| + |...|||| + + This will serve as a starting point for our "truth set", by using the results of the match to "pre-label" the data. +3. Review the matches in QGIS. Load the overture features, the "pre-labeled" for_judgment.txt points, and the `snapped_points.txt` file. This should make it easy to observe which of the matches are incorrect. Optionally you could add an OSM tiles layer for example to add more context. +4. Save the corrected labels file as `.labeled.txt`. In our example this file can be found here: [data\macon-manual-traces.labeled.txt](data\macon-manual-traces.labeled.txt) +5. Compute **Error Rate** metric. This is done automatically by the script if a .labeled.txt file exists corresponding to the input traces file. +Error Rate is defined as the ratio between the length of the traces for which the prediction is matching the labeled set and the total length of the traces. + +**Automatic Quality Proxy Metrics** + +Because obtaining labeled data for a representative set can be difficult, we provide as alternative the metrics below that are calculated automatically when running the script. + +**Note**: all the metrics are averaged for the whole set and per each trace in the output. For most of them (all except first two) they are also provided per-km of length of trace, which are independent of trace lengths to facilitate cross-set comparisons. Length of trace in this context is calculated as sum of distances between each point of the input GPS trace. While a more "correct" length of trace would be the route distance, and you can still compute that yourself from the match result, we are using this definition because for some traces we won't be able to find a full or even partial route. Side effect is that the per-km metrics give more importance to points with noise, which artificially adds length to the truly traveled length. + +1. **Average distance to snapped road** - per trace. How far away are the GPS points from the snapped road in meters. Not counting points without matches. +2. **Snapped route length to GPS length ratio** - per trace. The ratio between the sum of route distances between points that we were able to match and the sum if distances between trace points. In ideal case with no GPS noise, agreement with the map and perfect match result this metric would get close to 1. Lower number can mean higher disagreement, missing roads in overture data set, or incorrect matches. 0 means nothing got matched. Values greater than 1 are also possible and valid, but could indicate incorrect route matches. +3. **Number of candidate segments** - per trace, per km. This counts how many roads are considered by the algorithm, as having common H3 tiles with the k-ringed H3 tiles of the trace. +4. **Number of matched segments** - per trace, per km. This will naturally vary from one type of road to another, but correlated with other dimensions it can be useful to detect outliers or problems. Zero means no match was found for any point of the trace. Also, a high discrepancy between number of candidate segments and number of matched segments could mean the algo is spending too much time considering too many candidates, see how to tweak performance in the section below. +5. **Number of sequence breaks** - per trace, per km. A sequence break can happen when there is missing or bad data in overture roads or in the trace or simply they disagree enough that a gap of no possible route is detected. +6. **Via-point revisits** - per trace, per km. A high number can be an indicator a lot of U-turns are predicted, which although can occur naturally, past some threshold can be a sign that the matches are incorrect. +7. **Segment revisits** - per trace, per km. Depending on your data, some traces will validly pass through same road segment again after having left it, but in many cases this is probably rare. An unusually high number can indicate wrong matches. + +For example, when matching the sample OSM traces with these options: +```json +{ + "sigma": 4.1, + "beta": 0.9, + "allow_loops": "True", + "max_point_to_road_distance": 30.0, + "max_route_to_trace_distance_difference": 300, + "revisit_segment_penalty_weight": 100, + "revisit_via_point_penalty_weight": 100, + "broken_time_gap_reset_sequence": 60, + "broken_distance_gap_reset_sequence": 300 +} +``` +The script will output these metrics (runtimes will vary depending on machine): +``` +Traces.............................157 +Target features....................22324 +Elapsed:...........................1min 50.846s +Avg runtime/trace..................0.706s +Avg runtime/km.....................0.178s +Avg distance to snapped road.......2.92m +Snapped route length...............574.95km +GPS traces length..................622.47km +Snapped route len/gps len..........0.92 +Avg number of candidate segments...58.69/trace, 14.80/km +Avg number of matched segments.....8.43/trace, 2.13/km +Avg number of sequence breaks......0.26/trace, 0.07/km +Avg number of revisited via points.0.31/trace, 0.08/km +Avg number of revisited segments...0.18/trace, 0.05/km +``` + +### Performance notes +While this demo match algorithm is not designed for performance, various parameters of the allow controlling the tradeoff between match quality and runtime. For example increasing `max_point_to_road_distance` will allow matching traces that are further away from the roads, thus increasing match recall for noisy GPS traces. However, this can increase significantly the runtime needed, since it increases the number of candidate roads to consider. + +For each each trace to be matched we provide the time elapsed in the output property `TraceMatchResult.elapsed` in seconds. This can be used to analyze how runtime correlates with various properties of the data, like trace length, number of points, number of sequence breaks or the various match parameters, or for identifying bottlenecks. + diff --git a/gers/examples/python/README.md b/gers/examples/python/README.md index 916267326..ac085312e 100644 --- a/gers/examples/python/README.md +++ b/gers/examples/python/README.md @@ -1,20 +1,20 @@ -# GERS Sidecar Match Example - -## Context - -Consumers of geospatial data sets usually need to solve a complex and costly process of matching them. -A data set that also has GERS IDs can be easily used to augment the Overture data set itself, or other data sets that also have GERS IDs via simple join by id. - -Because Overture data sets are modeled and produced with prioritizing for stability of its identifiers (GERS IDs) over time, and the cost of matching being offset to the owner of the data sets, the consumers of data sets with GERS IDs can conflate, evaluate and onboard such feeds much cheaper and faster. - -## Purpose - -Matching a geospatial data set with overture (or any other) data set is a common problem and many solutions exist for this, from generic to highly specialized for particular data types. - -Depending on the match requirements, this can be achieved with a open source or commercial tools or services, with a few click or couple of lines of code or with large scale distributed system with complex match logic. - -Main purpose is to provide an example of how to start exploring a data set's compatibility with overture data set and to find GERS IDs that correspond to its features. - -## Example -[Snap GPS traces to overture roads](MATCH_TRACES.md) - +# GERS Sidecar Match Example + +## Context + +Consumers of geospatial data sets usually need to solve a complex and costly process of matching them. +A data set that also has GERS IDs can be easily used to augment the Overture data set itself, or other data sets that also have GERS IDs via simple join by id. + +Because Overture data sets are modeled and produced with prioritizing for stability of its identifiers (GERS IDs) over time, and the cost of matching being offset to the owner of the data sets, the consumers of data sets with GERS IDs can conflate, evaluate and onboard such feeds much cheaper and faster. + +## Purpose + +Matching a geospatial data set with overture (or any other) data set is a common problem and many solutions exist for this, from generic to highly specialized for particular data types. + +Depending on the match requirements, this can be achieved with a open source or commercial tools or services, with a few click or couple of lines of code or with large scale distributed system with complex match logic. + +Main purpose is to provide an example of how to start exploring a data set's compatibility with overture data set and to find GERS IDs that correspond to its features. + +## Example +[Snap GPS traces to overture roads](MATCH_TRACES.md) + diff --git a/gers/examples/python/__init__.py b/gers/examples/python/__init__.py index d3f5a12fa..8b1378917 100644 --- a/gers/examples/python/__init__.py +++ b/gers/examples/python/__init__.py @@ -1 +1 @@ - + diff --git a/gers/examples/python/constants.py b/gers/examples/python/constants.py index f15adb2d0..31ae3e40e 100644 --- a/gers/examples/python/constants.py +++ b/gers/examples/python/constants.py @@ -1,27 +1,27 @@ -from enum import Enum -import os - -DEFAULT_H3_RESOLUTION = 12 - -# default params for nearest match -DEFAULT_NEAREST_MAX_DISTANCE = 100 # meters - -# default params for trace snapping -DEFAULT_SIGMA = 4.1 # 4.10351310622546; -DEFAULT_BETA = 0.9 # 0.905918746744877 -> this default beta was found to apply to a 5 second sample rate. -# also was found to have good noise rejection characteristics and performed just as well or better than 1 second data, so it -# is now our default sampling period - even if the raw data was sampled at a higher rate -DEFAULT_MAX_POINT_TO_ROAD_DISTANCE = 10 # 200m in original paper -DEFAULT_MAX_ROUTE_TO_TRACE_DISTANCE_DIFFERENCE = 300 # what's a good value for this? 2km in original paper but too slow -DEFAULT_ALLOW_LOOPS = False -DEFAULT_SEGMENT_REVISIT_PENALTY = 100 # set to 0 if no penalty is desired -DEFAULT_VIA_POINT_PENALTY_WEIGHT = 100 # set to 0 if no penalty is desired -DEFAULT_BROKEN_TIME_GAP_RESET_SEQUENCE = 60 # seconds -DEFAULT_BROKEN_DISTANCE_GAP_RESET_SEQUENCE = 300 # meters - -"""default column separator of text files""" -COLUMN_SEPARATOR = "\t" - -DATA_DIR = "gers/examples/python/data" - - +from enum import Enum +import os + +DEFAULT_H3_RESOLUTION = 12 + +# default params for nearest match +DEFAULT_NEAREST_MAX_DISTANCE = 100 # meters + +# default params for trace snapping +DEFAULT_SIGMA = 4.1 # 4.10351310622546; +DEFAULT_BETA = 0.9 # 0.905918746744877 -> this default beta was found to apply to a 5 second sample rate. +# also was found to have good noise rejection characteristics and performed just as well or better than 1 second data, so it +# is now our default sampling period - even if the raw data was sampled at a higher rate +DEFAULT_MAX_POINT_TO_ROAD_DISTANCE = 10 # 200m in original paper +DEFAULT_MAX_ROUTE_TO_TRACE_DISTANCE_DIFFERENCE = 300 # what's a good value for this? 2km in original paper but too slow +DEFAULT_ALLOW_LOOPS = False +DEFAULT_SEGMENT_REVISIT_PENALTY = 100 # set to 0 if no penalty is desired +DEFAULT_VIA_POINT_PENALTY_WEIGHT = 100 # set to 0 if no penalty is desired +DEFAULT_BROKEN_TIME_GAP_RESET_SEQUENCE = 60 # seconds +DEFAULT_BROKEN_DISTANCE_GAP_RESET_SEQUENCE = 300 # meters + +"""default column separator of text files""" +COLUMN_SEPARATOR = "\t" + +DATA_DIR = "gers/examples/python/data" + + diff --git a/gers/examples/python/match_classes.py b/gers/examples/python/match_classes.py index 0e1c55805..fa4c09efa 100644 --- a/gers/examples/python/match_classes.py +++ b/gers/examples/python/match_classes.py @@ -1,216 +1,216 @@ -import json -from typing import Dict, Iterable -from shapely.geometry import Point -from shapely.geometry.base import BaseGeometry -import constants - -class MatchableFeature: - """ - Convenience class to hold an id, a shapely geometry, and optionally a dictionary of properties for use in matching. - It can be trivially populated from geojson and overture as an extension of geojson. - """ - def __init__(self, id: str, geometry:BaseGeometry, properties: dict=None) -> None: - self.id = str(id) - self.geometry = geometry - self.properties = properties - - def __str__(self) -> str: - return json.dumps({ - "id": self.id, - "geometry": self.geometry.wkt, - "properties": self.properties - }) - - def get_connector_ids(self) -> Iterable[str]: - return self.properties["connector_ids"] if self.properties is not None and "connector_ids" in self.properties else [] - -class MatchableFeaturesSet: - """Collection of matchable features, indexed by id, and by cells (H3 in current implementation)""" - def __init__(self, features: Dict[str, Iterable[MatchableFeature]], cells_by_id: Dict[str, Iterable[str]], features_by_cell: Dict[str, Iterable[MatchableFeature]]) -> None: - self.features_by_id = features - self.cells_by_id = cells_by_id - self.features_by_cell = features_by_cell - -class MatchedFeature: - """One matched feature with match-relevant information""" - def __init__(self, id: str, matched_feature: MatchableFeature, overlapping_geometry: BaseGeometry, score: float, source_lr: Iterable[float]=None, candidate_lr: Iterable[float]=None) -> None: - """ - Attributes: - id: the gers id of the matched feature - matched_feature: the matched feature itself - overlapping_geometry: the sub-part of the matched features' geometry that overlaps with the source feature - score: the score of the match - source_lr: the Location Reference in the source geometry of the part that matched as array of from-to points projection factors - candidate_lr: the Location Reference in the matched geometry of the part that matched the source geometry - """ - self.id = id # the gers id of the matched feature - self.matched_feature = matched_feature - self.overlapping_geometry = overlapping_geometry - self.score = score - self.source_lr = source_lr - self.candidate_lr = candidate_lr - - def to_json(self): - j = { - "id": str(self.id), - "candidate_wkt": self.matched_feature.geometry.wkt, - "overlapping_wkt": self.overlapping_geometry.wkt if self.overlapping_geometry is not None else None, - "score": self.score, - } - if self.source_lr is not None: - j["source_lr"] = self.source_lr - if self.candidate_lr is not None: - j["candidate_lr"] = self.candidate_lr - return j - - def __str__(self) -> str: - return json.dumps(self.to_json()) - -class TraceSnapOptions: - """"Parameters for matching a trace to road segments""" - def __init__(self, \ - sigma=constants.DEFAULT_SIGMA,\ - beta=constants.DEFAULT_BETA,\ - max_point_to_road_distance=constants.DEFAULT_MAX_POINT_TO_ROAD_DISTANCE,\ - max_route_to_trace_distance_difference=constants.DEFAULT_MAX_ROUTE_TO_TRACE_DISTANCE_DIFFERENCE,\ - allow_loops=constants.DEFAULT_ALLOW_LOOPS, - revisit_segment_penalty_weight=constants.DEFAULT_SEGMENT_REVISIT_PENALTY, - revisit_via_point_penalty_weight=constants.DEFAULT_VIA_POINT_PENALTY_WEIGHT, - broken_time_gap_reset_sequence=constants.DEFAULT_BROKEN_TIME_GAP_RESET_SEQUENCE, - broken_distance_gap_reset_sequence=constants.DEFAULT_BROKEN_DISTANCE_GAP_RESET_SEQUENCE) -> None: - self.sigma = sigma - self.beta = beta - self.allow_loops = allow_loops - self.max_point_to_road_distance = max_point_to_road_distance - self.max_route_to_trace_distance_difference = max_route_to_trace_distance_difference - self.revisit_segment_penalty_weight = revisit_segment_penalty_weight - self.revisit_via_point_penalty_weight = revisit_via_point_penalty_weight - self.broken_time_gap_reset_sequence = broken_time_gap_reset_sequence - self.broken_distance_gap_reset_sequence = broken_distance_gap_reset_sequence - -class RouteStep: - """One step in a route, corresponding to one road segment feature""" - def __init__(self, feature: MatchableFeature, via_point: Point) -> None: - """ - Attributes: - feature: the matched feature - via_point: the point on the feature where the route enters the feature as a shapely Point - """ - self.feature = feature - self.via_point = via_point - -class Route: - """A route, consisting of a sequence of steps""" - def __init__(self, distance: float, steps: Iterable[RouteStep]) -> None: - self.distance = distance - self.steps = steps - -class SnappedPointPrediction: - """A road segment feature as a snap prediction for point in a trace, with relevant match signals""" - def __init__(self, id: str, snapped_point: Point, referenced_feature: MatchableFeature, distance_to_snapped_road: float, route_distance_to_prev_point: float, emission_prob: float, best_transition_prob: float, best_log_prob: float, best_prev_prediction: float, best_sequence: Iterable[str], best_route_via_points: Iterable[str], best_revisited_via_points_count:int, best_revisited_segments_count:int) -> None: - self.id = str(id) - self.snapped_point = snapped_point - self.referenced_feature = referenced_feature - self.distance_to_snapped_road = distance_to_snapped_road - self.route_distance_to_prev_point = route_distance_to_prev_point - self.emission_prob = emission_prob - self.best_transition_prob = best_transition_prob - self.best_log_prob = best_log_prob - self.best_prev_prediction = best_prev_prediction - self.best_sequence = best_sequence - self.best_route_via_points = best_route_via_points - self.best_revisited_via_points_count = best_revisited_via_points_count - self.best_revisited_segments_count = best_revisited_segments_count - - def to_json(self, diagnostic_mode=False): - best_prev_prediction_id = "" - if self.best_prev_prediction is not None: - best_prev_prediction_id = self.best_prev_prediction.id - - j = { - "id": self.id, - "snapped_point": self.snapped_point.wkt, - "distance_to_snapped_road": self.distance_to_snapped_road, - "route_distance_to_prev_point": self.route_distance_to_prev_point, - } - - if diagnostic_mode: - j["referenced_feature"] = self.referenced_feature.geometry.wkt - j["emission_prob"] = self.emission_prob - j["best_transition_prob"] = self.best_transition_prob - j["best_log_prob"] = self.best_log_prob - j["best_prev_prediction"] = best_prev_prediction_id - j["best_route_via_points"] = self.best_route_via_points - j["best_revisited_via_points_count"] = self.best_revisited_via_points_count - j["best_revisited_segments_count"] = self.best_revisited_segments_count - - return j - -class PointSnapInfo: - """Snap-to-road match information corresponding to one point in a trace""" - def __init__(self, index: int, original_point: Point, time: str, seconds_since_prev_point: float=None, predictions:Iterable[SnappedPointPrediction]=[]) -> None: - self.index = index - self.original_point = original_point - self.time = time - self.seconds_since_prev_point = seconds_since_prev_point - self.predictions = predictions - self.best_prediction = None - self.ignore = False - - def to_json(self, diagnostic_mode: bool=False, include_all_predictions: bool=False,): - best_prediction_json = None if self.best_prediction is None else self.best_prediction.to_json(diagnostic_mode) - - j = { - "original_point": self.original_point.wkt, - "time": self.time, - "seconds_since_prev_point": self.seconds_since_prev_point, - "snap_prediction": best_prediction_json, - } - - if self.ignore: - j["ignore"] = True - - if diagnostic_mode: - j["point_index"] = self.index - - if include_all_predictions: - j["predictions"] = list(map(lambda x: x.to_json(diagnostic_mode), self.predictions)) - return j - -class TraceMatchResult: - """Result of a matching trace to road segments""" - def __init__(self, id: str, source_wkt: str, points: Iterable[PointSnapInfo], source_length: float, target_candidates_count: int, matched_target_ids: Iterable[str]=None, elapsed: float=None, sequence_breaks: int=0, points_with_matches: int=0, route_length: float=0, avg_dist_to_road: float=None, revisited_via_points: int=0, revisited_segments: int=0) -> None: - self.id = id - self.source_wkt = source_wkt - self.points = points - self.source_length = source_length - self.target_candidates_count = target_candidates_count - self.matched_target_ids = matched_target_ids - self.elapsed = elapsed - self.sequence_breaks = sequence_breaks - self.points_with_matches = points_with_matches - self.route_length = route_length - self.avg_dist_to_road = avg_dist_to_road - self.revisited_via_points = revisited_via_points - self.revisited_segments = revisited_segments - - def to_json(self, diagnostic_mode=False, include_all_predictions=False): - points_json = list(map(lambda x: x.to_json(diagnostic_mode, include_all_predictions), self.points)) - return { - "id": str(self.id), - "elapsed": self.elapsed, - "source_length": self.source_length, - "route_length": self.route_length, - "points": len(self.points), - "points_with_matches": self.points_with_matches, - "avg_dist_to_road": self.avg_dist_to_road, - "sequence_breaks": self.sequence_breaks, - "revisited_via_points": self.revisited_via_points, - "revisited_segments": self.revisited_segments, - "target_candidates_count": self.target_candidates_count, - "target_ids": self.matched_target_ids, - "points": points_json - } - - def __str__(self) -> str: - return json.dumps(self.to_json()) +import json +from typing import Dict, Iterable +from shapely.geometry import Point +from shapely.geometry.base import BaseGeometry +import constants + +class MatchableFeature: + """ + Convenience class to hold an id, a shapely geometry, and optionally a dictionary of properties for use in matching. + It can be trivially populated from geojson and overture as an extension of geojson. + """ + def __init__(self, id: str, geometry:BaseGeometry, properties: dict=None) -> None: + self.id = str(id) + self.geometry = geometry + self.properties = properties + + def __str__(self) -> str: + return json.dumps({ + "id": self.id, + "geometry": self.geometry.wkt, + "properties": self.properties + }) + + def get_connector_ids(self) -> Iterable[str]: + return self.properties["connector_ids"] if self.properties is not None and "connector_ids" in self.properties else [] + +class MatchableFeaturesSet: + """Collection of matchable features, indexed by id, and by cells (H3 in current implementation)""" + def __init__(self, features: Dict[str, Iterable[MatchableFeature]], cells_by_id: Dict[str, Iterable[str]], features_by_cell: Dict[str, Iterable[MatchableFeature]]) -> None: + self.features_by_id = features + self.cells_by_id = cells_by_id + self.features_by_cell = features_by_cell + +class MatchedFeature: + """One matched feature with match-relevant information""" + def __init__(self, id: str, matched_feature: MatchableFeature, overlapping_geometry: BaseGeometry, score: float, source_lr: Iterable[float]=None, candidate_lr: Iterable[float]=None) -> None: + """ + Attributes: + id: the gers id of the matched feature + matched_feature: the matched feature itself + overlapping_geometry: the sub-part of the matched features' geometry that overlaps with the source feature + score: the score of the match + source_lr: the Location Reference in the source geometry of the part that matched as array of from-to points projection factors + candidate_lr: the Location Reference in the matched geometry of the part that matched the source geometry + """ + self.id = id # the gers id of the matched feature + self.matched_feature = matched_feature + self.overlapping_geometry = overlapping_geometry + self.score = score + self.source_lr = source_lr + self.candidate_lr = candidate_lr + + def to_json(self): + j = { + "id": str(self.id), + "candidate_wkt": self.matched_feature.geometry.wkt, + "overlapping_wkt": self.overlapping_geometry.wkt if self.overlapping_geometry is not None else None, + "score": self.score, + } + if self.source_lr is not None: + j["source_lr"] = self.source_lr + if self.candidate_lr is not None: + j["candidate_lr"] = self.candidate_lr + return j + + def __str__(self) -> str: + return json.dumps(self.to_json()) + +class TraceSnapOptions: + """"Parameters for matching a trace to road segments""" + def __init__(self, \ + sigma=constants.DEFAULT_SIGMA,\ + beta=constants.DEFAULT_BETA,\ + max_point_to_road_distance=constants.DEFAULT_MAX_POINT_TO_ROAD_DISTANCE,\ + max_route_to_trace_distance_difference=constants.DEFAULT_MAX_ROUTE_TO_TRACE_DISTANCE_DIFFERENCE,\ + allow_loops=constants.DEFAULT_ALLOW_LOOPS, + revisit_segment_penalty_weight=constants.DEFAULT_SEGMENT_REVISIT_PENALTY, + revisit_via_point_penalty_weight=constants.DEFAULT_VIA_POINT_PENALTY_WEIGHT, + broken_time_gap_reset_sequence=constants.DEFAULT_BROKEN_TIME_GAP_RESET_SEQUENCE, + broken_distance_gap_reset_sequence=constants.DEFAULT_BROKEN_DISTANCE_GAP_RESET_SEQUENCE) -> None: + self.sigma = sigma + self.beta = beta + self.allow_loops = allow_loops + self.max_point_to_road_distance = max_point_to_road_distance + self.max_route_to_trace_distance_difference = max_route_to_trace_distance_difference + self.revisit_segment_penalty_weight = revisit_segment_penalty_weight + self.revisit_via_point_penalty_weight = revisit_via_point_penalty_weight + self.broken_time_gap_reset_sequence = broken_time_gap_reset_sequence + self.broken_distance_gap_reset_sequence = broken_distance_gap_reset_sequence + +class RouteStep: + """One step in a route, corresponding to one road segment feature""" + def __init__(self, feature: MatchableFeature, via_point: Point) -> None: + """ + Attributes: + feature: the matched feature + via_point: the point on the feature where the route enters the feature as a shapely Point + """ + self.feature = feature + self.via_point = via_point + +class Route: + """A route, consisting of a sequence of steps""" + def __init__(self, distance: float, steps: Iterable[RouteStep]) -> None: + self.distance = distance + self.steps = steps + +class SnappedPointPrediction: + """A road segment feature as a snap prediction for point in a trace, with relevant match signals""" + def __init__(self, id: str, snapped_point: Point, referenced_feature: MatchableFeature, distance_to_snapped_road: float, route_distance_to_prev_point: float, emission_prob: float, best_transition_prob: float, best_log_prob: float, best_prev_prediction: float, best_sequence: Iterable[str], best_route_via_points: Iterable[str], best_revisited_via_points_count:int, best_revisited_segments_count:int) -> None: + self.id = str(id) + self.snapped_point = snapped_point + self.referenced_feature = referenced_feature + self.distance_to_snapped_road = distance_to_snapped_road + self.route_distance_to_prev_point = route_distance_to_prev_point + self.emission_prob = emission_prob + self.best_transition_prob = best_transition_prob + self.best_log_prob = best_log_prob + self.best_prev_prediction = best_prev_prediction + self.best_sequence = best_sequence + self.best_route_via_points = best_route_via_points + self.best_revisited_via_points_count = best_revisited_via_points_count + self.best_revisited_segments_count = best_revisited_segments_count + + def to_json(self, diagnostic_mode=False): + best_prev_prediction_id = "" + if self.best_prev_prediction is not None: + best_prev_prediction_id = self.best_prev_prediction.id + + j = { + "id": self.id, + "snapped_point": self.snapped_point.wkt, + "distance_to_snapped_road": self.distance_to_snapped_road, + "route_distance_to_prev_point": self.route_distance_to_prev_point, + } + + if diagnostic_mode: + j["referenced_feature"] = self.referenced_feature.geometry.wkt + j["emission_prob"] = self.emission_prob + j["best_transition_prob"] = self.best_transition_prob + j["best_log_prob"] = self.best_log_prob + j["best_prev_prediction"] = best_prev_prediction_id + j["best_route_via_points"] = self.best_route_via_points + j["best_revisited_via_points_count"] = self.best_revisited_via_points_count + j["best_revisited_segments_count"] = self.best_revisited_segments_count + + return j + +class PointSnapInfo: + """Snap-to-road match information corresponding to one point in a trace""" + def __init__(self, index: int, original_point: Point, time: str, seconds_since_prev_point: float=None, predictions:Iterable[SnappedPointPrediction]=[]) -> None: + self.index = index + self.original_point = original_point + self.time = time + self.seconds_since_prev_point = seconds_since_prev_point + self.predictions = predictions + self.best_prediction = None + self.ignore = False + + def to_json(self, diagnostic_mode: bool=False, include_all_predictions: bool=False,): + best_prediction_json = None if self.best_prediction is None else self.best_prediction.to_json(diagnostic_mode) + + j = { + "original_point": self.original_point.wkt, + "time": self.time, + "seconds_since_prev_point": self.seconds_since_prev_point, + "snap_prediction": best_prediction_json, + } + + if self.ignore: + j["ignore"] = True + + if diagnostic_mode: + j["point_index"] = self.index + + if include_all_predictions: + j["predictions"] = list(map(lambda x: x.to_json(diagnostic_mode), self.predictions)) + return j + +class TraceMatchResult: + """Result of a matching trace to road segments""" + def __init__(self, id: str, source_wkt: str, points: Iterable[PointSnapInfo], source_length: float, target_candidates_count: int, matched_target_ids: Iterable[str]=None, elapsed: float=None, sequence_breaks: int=0, points_with_matches: int=0, route_length: float=0, avg_dist_to_road: float=None, revisited_via_points: int=0, revisited_segments: int=0) -> None: + self.id = id + self.source_wkt = source_wkt + self.points = points + self.source_length = source_length + self.target_candidates_count = target_candidates_count + self.matched_target_ids = matched_target_ids + self.elapsed = elapsed + self.sequence_breaks = sequence_breaks + self.points_with_matches = points_with_matches + self.route_length = route_length + self.avg_dist_to_road = avg_dist_to_road + self.revisited_via_points = revisited_via_points + self.revisited_segments = revisited_segments + + def to_json(self, diagnostic_mode=False, include_all_predictions=False): + points_json = list(map(lambda x: x.to_json(diagnostic_mode, include_all_predictions), self.points)) + return { + "id": str(self.id), + "elapsed": self.elapsed, + "source_length": self.source_length, + "route_length": self.route_length, + "points": len(self.points), + "points_with_matches": self.points_with_matches, + "avg_dist_to_road": self.avg_dist_to_road, + "sequence_breaks": self.sequence_breaks, + "revisited_via_points": self.revisited_via_points, + "revisited_segments": self.revisited_segments, + "target_candidates_count": self.target_candidates_count, + "target_ids": self.matched_target_ids, + "points": points_json + } + + def __str__(self) -> str: + return json.dumps(self.to_json()) diff --git a/gers/examples/python/match_traces.py b/gers/examples/python/match_traces.py index bbf2e6a02..35424196a 100644 --- a/gers/examples/python/match_traces.py +++ b/gers/examples/python/match_traces.py @@ -1,508 +1,508 @@ -import argparse -import csv -import json -import os -import math - -import constants -from route_utils import get_shortest_route -from match_classes import TraceSnapOptions, MatchableFeature, TraceMatchResult, SnappedPointPrediction, PointSnapInfo, RouteStep -from utils import get_features_with_cells, get_seconds_elapsed, get_distance, get_linestring_length, load_matchable_set - -from shapely import Point -from shapely.ops import nearest_points -from timeit import default_timer as timer -from typing import Dict, Iterable - -def get_feature_id_to_connected_features(features_overture: Iterable[MatchableFeature]) -> Dict[str, Iterable[MatchableFeature]]: - """returns a connected roads "graph" as a dictionary of feature id to features that are connected to it, as modeled in overture schema via connector_ids property""" - connector_id_to_features = {} - for feature in features_overture: - for connector_id in feature.get_connector_ids(): - if not connector_id in connector_id_to_features: - connector_id_to_features[connector_id] = [] - connector_id_to_features[connector_id].append(feature) - - feature_id_to_connected_features = {} - for feature in features_overture: - feature_id_to_connected_features[feature.id] = [] - for connector_id in feature.get_connector_ids(): - for other_feature in connector_id_to_features[connector_id]: - if other_feature.id != feature.id: - feature_id_to_connected_features[feature.id].append(other_feature) - return feature_id_to_connected_features - -def read_predictions(predictions_file: str): - """reads snap predictions from tab separated file with columns: trace_id, point_index, gers_id, score""" - p = {} - with open(predictions_file, 'r') as file: - reader = csv.reader(file, delimiter=constants.COLUMN_SEPARATOR) - for row in reader: - try: - trace_id = row[0] - point_index = int(row[1]) - gers_id = row[3] - if not(trace_id in p): - p[trace_id] = {} - p[trace_id][point_index] = gers_id - except ValueError: - continue # header or invalid line - return p - -def calculate_error_rate(labeled_file: str, target_features_by_id: Dict[str, Iterable[MatchableFeature]], match_results: Iterable[TraceMatchResult]): - """returns total error rate from a labeled file and a list of trace match results""" - if not(os.path.exists(labeled_file)): - print(f'no metrics to compute (file {labeled_file} does not exist)') - return - - labels = read_predictions(labeled_file) - total_correct_distance = 0 - total_incorrect_distance = 0 - with open(labeled_file + ".actual.txt",'w') as f: - f.write(constants.COLUMN_SEPARATOR.join(["trace_id", "point_index", "label_gers_id", "prediction_gers_id", "label_snapped_wkt", "prediction_snapped_wkt", "distance_to_prev_point", "is_correct"]) + "\n") - for trace_match_result in match_results: - if not(trace_match_result.id in labels): - continue - - correct_distance = 0 - incorrect_distance = 0 - prev_point = None - - for point in trace_match_result.points: - if not(point.index in labels[trace_match_result.id]): - print(f'no label for trace_id={trace_match_result.id} point_index={point.index}') - break - - label_gers_id = labels[trace_match_result.id][point.index] - dist_to_prev_point = 0 - is_correct = not(point.best_prediction is None) and (str(point.best_prediction.id) == label_gers_id) - if prev_point is not None: - dist_to_prev_point = get_distance(prev_point, point.original_point) - correct_distance += dist_to_prev_point - if not is_correct: - # in the original paper error metric is defined as: (added incorrect route distance + removed correct route distance) / total correct route distance - # but since it's difficult to label the correct route distance (would need to have a reliable routing engine to correctly calculate it) - # we'll just use the distance between the original route's points - # side effect is that start/end points and stopped points that usually have more gps noise will be penalized more than the route distance approach would - - # but that may be preferable to overweigh the more problematic/difficult points - incorrect_distance += dist_to_prev_point - - label_snapped_point = None - if not(label_gers_id in target_features_by_id): - print(f'no target feature for label_gers_id={label_gers_id}') - else: - label_shape = target_features_by_id[label_gers_id].geometry - x, label_snapped_point = nearest_points(point.original_point, label_shape) - - columns = [\ - str(trace_match_result.id), \ - str(point.index), \ - str(label_gers_id), \ - str(point.best_prediction.id) if point.best_prediction is not None else "", \ - label_snapped_point.wkt if label_snapped_point is not None else "", \ - point.best_prediction.snapped_point.wkt if point.best_prediction is not None else "", \ - str(dist_to_prev_point), \ - str(is_correct), \ - ] - f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") - - prev_point = point.original_point - - trace_error_rate = incorrect_distance / correct_distance - print(rf"trace_id={trace_match_result.id} trace_error_rate={trace_error_rate:.2f} correct_distance={correct_distance:.2f} incorrect_distance={incorrect_distance:.2f}") - total_correct_distance += correct_distance - total_incorrect_distance += incorrect_distance - - if total_correct_distance == 0: - print('no correct distance') - return -1 - - total_error_rate = total_incorrect_distance / total_correct_distance - print(rf"total_error_rate={total_error_rate:.2f} total_correct_distance={total_correct_distance:.2f} total_incorrect_distance={total_incorrect_distance:.2f}") - return total_error_rate - -def output_trace_snap_results(match_results: Iterable[TraceMatchResult], output_file_name: str, output_for_judgment: bool = False): - results_json = list(map(lambda x: x.to_json(diagnostic_mode=False, include_all_predictions=False), match_results)) - with open(output_file_name, 'w') as f: - json.dump(results_json, f, indent=4) - - results_json = list(map(lambda x: x.to_json(diagnostic_mode=True, include_all_predictions=False), match_results)) - with open(output_file_name + ".with_diagnostics.json", 'w') as f: - json.dump(results_json, f, indent=4) - - results_json = list(map(lambda x: x.to_json(diagnostic_mode=True, include_all_predictions=True), match_results)) - with open(output_file_name + ".with_diagnostics-all-predictions.json", 'w') as f: - json.dump(results_json, f, indent=4) - - if output_for_judgment: - with open(output_file_name + ".for_judgment.txt",'w') as f: - f.write(constants.COLUMN_SEPARATOR.join(["trace_id", "point_index", "trace_point_wkt", "gers_id"]) + "\n") - for r in match_results: - for idx, p in enumerate(r.points): - columns = [ - str(r.id), - str(idx), - p.original_point.wkt, - str(p.best_prediction.id) if p.best_prediction is not None else "" - ] - f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") - - with open(output_file_name + ".snapped_points.txt",'w') as f: - f.write(constants.COLUMN_SEPARATOR.join(["trace_id", "point_index", "gers_id", "snapped_point_wkt"]) + "\n") - for r in match_results: - for idx, p in enumerate(r.points): - columns = [ - str(r.id), - str(idx), - str(p.best_prediction.id) if p.best_prediction is not None else "", - p.best_prediction.snapped_point.wkt if p.best_prediction is not None else "" - ] - f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") - - with open(output_file_name + ".auto_metrics.txt",'w') as f: - header = [ - "id", - "source_length", - "route_length", - "points", - "points_with_match", - "percent_points_with_match", - "target_candidates_count", - "matched_target_ids_count", - "avg_dist_to_road", - "sequence_breaks", - "revisited_via_points", - "revisited_segments", - "elapsed", - "source_wkt" - ] - f.write(constants.COLUMN_SEPARATOR.join(header) + "\n") - for r in match_results: - columns = [ - str(r.id), - str(r.source_length), - str(r.route_length), - str(len(r.points)), - str(r.points_with_matches), - rf"{(100*r.points_with_matches/len(r.points)):.2f}", - str(r.target_candidates_count), - str(len(r.matched_target_ids)), - str(r.avg_dist_to_road), - str(r.sequence_breaks), - str(r.revisited_via_points), - str(r.revisited_segments), - str(r.elapsed), - str(r.source_wkt), - ] - f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") - -def set_best_path_predictions(points: Iterable[PointSnapInfo]): - """Sets the best prediction for each point in the sequence, starting from the end and going backwards following the best_prev_prediction chain""" - - last_point = points[-1] - if last_point.predictions is None or len(last_point.predictions) == 0 or last_point.predictions[0].best_log_prob == 0: - return # no path found - - last_point.best_prediction = last_point.predictions[0] # this is sorted descending by probability, so the first one is the best - for idx in range(len(points)-2, -1, -1): - if points[idx + 1].best_prediction is not None: - points[idx].best_prediction = points[idx + 1].best_prediction.best_prev_prediction - else: - if not(points[idx].ignore) and len(points[idx].predictions) > 0: - points[idx].best_prediction = points[idx].predictions[0] - -def extend_sequence(steps: Iterable[RouteStep], prev_prediction: SnappedPointPrediction): - """Extends the sequence of the traveled segments up to the previous point with the new steps; also returns the number of revisited segments and via points""" - revisited_via_points_count = 0 - revisited_segments_count = 0 - extended_sequence = prev_prediction.best_sequence.copy() if prev_prediction.best_sequence is not None else [] - revisited_segments_count = 0 - added_via_points = [] - for step in steps: - if len(extended_sequence) == 0 or step.feature.id != extended_sequence[-1]: # either first step or new feature - if len(extended_sequence) > 0 and step.feature.id in extended_sequence: # different than prev segment but present in the sequence, so we are revisiting it - revisited_segments_count += 1 - extended_sequence.append(step.feature.id) - if step.via_point is not None: - added_via_points.append(step.via_point.wkt) - - if len(added_via_points) > 0: - all_prev_via_points = set() - p = prev_prediction - while p is not None: - if p.best_route_via_points is not None: - for vp in p.best_route_via_points: - all_prev_via_points.add(vp) - if len(all_prev_via_points) > 100: - break # optimization for very long traces, don't need to check all of them, just the recent ones - p = p.best_prev_prediction - - for added_via_point in added_via_points: - if added_via_point in all_prev_via_points: - revisited_via_points_count += 1 - return (extended_sequence, revisited_segments_count, revisited_via_points_count) - -def get_trace_matches(source_feature: MatchableFeature, target_candidates: Iterable[MatchableFeature], options: TraceSnapOptions) -> TraceMatchResult: - """Matches a `source_feature` trace to most likely traveled `targe_candidates` road segments""" - start = timer() - - feature_id_to_connected_features = get_feature_id_to_connected_features(target_candidates) - - filter_feature_ids = set(map(lambda x: x.id, target_candidates)) - - times = source_feature.properties.get('times') - points = [] - prev_point = None - sequence_breaks = 0 - for idx, coord in enumerate(source_feature.geometry.coords): - - original_point = Point(coord[0], coord[1]) - predictions = [] - - for target_feature in target_candidates: - op, snapped_point = nearest_points(original_point, target_feature.geometry) - distance_to_road = get_distance(original_point, snapped_point) - if distance_to_road > options.max_point_to_road_distance: - continue - - emission_prob = (1 / (math.sqrt(2*math.pi) * options.sigma)) * math.exp(-0.5 * ((distance_to_road/options.sigma)**2)) # measurement probability - if was on this road how likely is it to have measured the point at this distance - best_log_prob = None - best_transition_prob = None - best_prev_prediction = None - best_route_dist_from_prev_point = None - best_sequence = None - best_route_via_points = None - best_revisited_via_points_count = 0 - best_revisited_segments_count = 0 - trace_dist_from_prev_point = 0 - # calculate transition probability from all prev point matches to current match candidate target_feature - if prev_point is None: - best_log_prob = math.log(emission_prob) - best_transition_prob = 1 - best_sequence = [target_feature.id] - else: - trace_dist_from_prev_point = get_distance(original_point, prev_point.original_point) - for prev_prediction in prev_point.predictions: - if not(options.allow_loops) and not(prev_prediction.best_sequence is None) and target_feature.id in prev_prediction.best_sequence and prev_prediction.referenced_feature.id != target_feature.id: - # already part of best sequence, but then moved to a different segment, so this is not a good candidate, it means this would walk back on itself - continue - - route = get_shortest_route(target_candidates, feature_id_to_connected_features, prev_prediction.referenced_feature, target_feature, prev_prediction.snapped_point, snapped_point, filter_feature_ids, [] if options.allow_loops else prev_prediction.best_sequence) - # check distance is not float('inf') - if route is None or route.distance == float('inf') : - # couldn't find path, skip this prev_match as impossible to transition from it to this match - continue - - dist_diff = abs(trace_dist_from_prev_point - route.distance) - - transition_prob = (1 / options.beta) * math.exp(-dist_diff / options.beta) - - extended_sequence, revisited_segments_count, revisited_via_points_count = extend_sequence(route.steps, prev_prediction) - transition_prob *= math.exp(-revisited_via_points_count * options.revisit_via_point_penalty_weight) # todo: what's the right way to penalize revisiting via points? - transition_prob *= math.exp(-revisited_segments_count * options.revisit_segment_penalty_weight) # todo: what's the right way to penalize revisiting segments? - - if dist_diff > options.max_route_to_trace_distance_difference or transition_prob <= 0: - continue - #match_prob = prev_prediction.best_prob * emission_prob * transition_prob - # probabilities multiplied over many points go to zero (floating point underflow), so use log of product is sum of logs - match_log_prob = prev_prediction.best_log_prob + math.log(emission_prob) + math.log(transition_prob) - #print(f'point#{idx} prev_prediction={prev_prediction.id} transition_prob={transition_prob} emission_prob={emission_prob} match_prob={match_prob} route_dist_from_prev_point={route_dist_from_prev_point} trace_dist_from_prev_point={trace_dist_from_prev_point} dist_diff={dist_diff}') - if best_log_prob is None or match_log_prob > best_log_prob: - best_log_prob = match_log_prob - best_transition_prob = transition_prob - best_prev_prediction = prev_prediction - best_route_dist_from_prev_point = route.distance - best_sequence = extended_sequence - best_route_via_points = [] - best_revisited_via_points_count = revisited_via_points_count - best_revisited_segments_count = revisited_segments_count - for step in route.steps: - if step.via_point is not None: - best_route_via_points.append(step.via_point.wkt) - # todo: also include the intermediate features in route.path - - if best_log_prob is None: - continue # couldn't find a path to this point, skip it - #print(f'point#{idx} candidate feature={target_feature.id} best_log_prob={best_log_prob} best_prev_point={best_prev_prediction.id if best_prev_prediction is not None else None} best_transition_prob={best_transition_prob} emission_prob={emission_prob} distance_to_road={distance_to_road}') - prediction = SnappedPointPrediction(target_feature.id, snapped_point, target_feature, distance_to_road, best_route_dist_from_prev_point, emission_prob, best_transition_prob, best_log_prob, best_prev_prediction, best_sequence, best_route_via_points, best_revisited_via_points_count, best_revisited_segments_count) - - predictions.append(prediction) - - predictions.sort(key=lambda x: x.best_log_prob, reverse=True) - time_since_prev_point = None if times is None or prev_point is None else get_seconds_elapsed(times[prev_point.index], times[idx]) - time = None if times is None else times[idx] - point = PointSnapInfo(idx, original_point, time, time_since_prev_point, predictions) - points.append(point) - - if len(predictions) > 0: - prev_point = point # don't update prev_point unless it has at least one prediction - else: - # no predictions for this point, so ignore current point and previous point to attempt to recover sequence; - # if gap between current point and prev_point is too big, abandon the prev_point and reset; - # this will happen when there is no road in the target map to match the trace - point.ignore = True - if prev_point is not None: - prev_point.ignore = True - if prev_point.index > 0: - prev_point = points[prev_point.index - 1] - # gap with no candidates too big if 60seconds or 200m since last point - if (time_since_prev_point is not None and time_since_prev_point > options.broken_time_gap_reset_sequence) or \ - trace_dist_from_prev_point > options.broken_distance_gap_reset_sequence: - #print(rf"#{str(idx)}: sequence break; time_since_prev_point={time_since_prev_point} trace_dist_from_prev_point={trace_dist_from_prev_point}") - # we have a sequence break, reset prev point, new sequence will start from next point - sequence_breaks += 1 - prev_point = None - else: - prev_point = None - - set_best_path_predictions(points) - - end = timer() - elapsed = end - start - source_feature_length = get_linestring_length(source_feature.geometry) - t = TraceMatchResult(source_feature.id, source_feature.geometry.wkt, points, source_feature_length, len(target_candidates), elapsed=elapsed, sequence_breaks=sequence_breaks) - set_trace_match_metrics(t) - return t - -def set_trace_match_metrics(t: TraceMatchResult) -> None: - matched_target_ids = set() - route_length = 0 - dist_to_road = 0 - revisited_via_points = 0 - revisited_segments = 0 - points_with_matches = 0 - for point in t.points: - if point.best_prediction is not None and point.best_prediction.referenced_feature is not None: - points_with_matches += 1 - route_length += point.best_prediction.route_distance_to_prev_point if point.best_prediction.route_distance_to_prev_point is not None else 0 - dist_to_road += point.best_prediction.distance_to_snapped_road - revisited_via_points += point.best_prediction.best_revisited_via_points_count - revisited_segments += point.best_prediction.best_revisited_segments_count - matched_target_ids.add(point.best_prediction.referenced_feature.id) - t.matched_target_ids = list(matched_target_ids) - t.points_with_matches = points_with_matches - t.route_length = round(route_length, 2) - t.avg_dist_to_road = round(dist_to_road / points_with_matches, 2) if points_with_matches > 0 else None - t.revisited_via_points = revisited_via_points - t.revisited_segments = revisited_segments - -def print_stats(source_features: Iterable[MatchableFeature], target_features: Iterable[MatchableFeature], match_results: Iterable[TraceMatchResult], total_elapsed: float, avg_runtime_per_feature: float): - num_traces = len(source_features) - total_route_length = sum([r.route_length for r in match_results]) / 1000 # in km - total_traces_length = sum([r.source_length for r in match_results]) / 1000 # in km - total_candidates = sum([r.target_candidates_count for r in match_results]) - total_matches = sum([len(r.matched_target_ids) for r in match_results]) - total_sequence_breaks = sum([r.sequence_breaks for r in match_results]) - total_revisited_via_points = sum([r.revisited_via_points for r in match_results]) - total_revisited_segments = sum([r.revisited_segments for r in match_results]) - total_traces_with_matches = sum([1 for r in match_results if r.points_with_matches > 0]) - total_avg_dist_to_road = sum([r.avg_dist_to_road for r in match_results if r.points_with_matches > 0]) - avg_runtime_per_km = total_elapsed / total_traces_length if total_traces_length > 0 else None - avg_dist_to_road = round(total_avg_dist_to_road / total_traces_with_matches, 2) if total_traces_with_matches > 0 else None - - print("==================================================================") - print("Totals:") - print("==================================================================") - print(rf"Traces.............................{num_traces}") - print(rf"Target features....................{len(target_features)}") - print(rf"Elapsed:...........................{round(total_elapsed//60)}min {total_elapsed%60:.3f}s") - print(rf"Avg runtime/trace..................{avg_runtime_per_feature:.3f}s") - print(rf"Avg runtime/km.....................{avg_runtime_per_km:.3f}s") - print(rf"Avg distance to snapped road.......{avg_dist_to_road}m") - print(rf"Snapped route length...............{total_route_length:.2f}km") - print(rf"GPS traces length..................{total_traces_length:.2f}km") - print(rf"Snapped route len/gps len..........{(total_route_length/total_traces_length):.2f}") - print(rf"Avg number of candidate segments...{(total_candidates/num_traces):.2f}/trace, {(total_candidates/total_traces_length):.2f}/km") - print(rf"Avg number of matched segments.....{(total_matches/num_traces):.2f}/trace, {(total_matches/total_traces_length):.2f}/km") - print(rf"Avg number of sequence breaks......{(total_sequence_breaks/num_traces):.2f}/trace, {(total_sequence_breaks/total_traces_length):.2f}/km") - print(rf"Avg number of revisited via points.{(total_revisited_via_points/num_traces):.2f}/trace, {(total_revisited_via_points/total_traces_length):.2f}/km") - print(rf"Avg number of revisited segments...{(total_revisited_segments/num_traces):.2f}/trace, {(total_revisited_segments/total_traces_length):.2f}/km") - print("==================================================================") - -def snap_traces(features_to_match_file: str, overture_file: str, output_file: str, res: int, snap_options: TraceSnapOptions=None, output_for_judgment: bool=False) -> None: - if snap_options is None: - snap_options = TraceSnapOptions() # loads default options - - # save the options we used next to the output file for debugging or comparison with other runs - with open(output_file + ".options.json", "w") as f: - json.dump(snap_options.__dict__, f, indent=4) - - start = timer() - print("Loading features...") - to_match_prop_filter = {} - #to_match_prop_filter["id"] = "manual_trace#4" - to_match = load_matchable_set(features_to_match_file, is_multiline=False, res=res) - features_to_match = to_match.features_by_id.values() - if len(features_to_match) == 0: - print("no features to match") - exit() - - overture = load_matchable_set(overture_file, is_multiline=True, properties_filter = {"type": "segment"}, res=res) - features_overture =overture.features_by_id.values() - print("Features to match: " + str(len(features_to_match))) - print("Features Overture: " + str(len(features_overture))) - end = timer() - print(f"Loading time: {(end-start):.2f}s") - - i = 0 - match_results = [] - total_elapsed = 0 - for source_feature in features_to_match: - i += 1 - - target_candidates = get_features_with_cells(overture.features_by_cell, to_match.cells_by_id[source_feature.id]) - match_res = get_trace_matches(source_feature, target_candidates, snap_options) - match_results.append(match_res) - - total_elapsed += match_res.elapsed - avg_runtime_per_feature = total_elapsed / i - - if i%1 == 0: - print(rf"trace#{str(i)} length={match_res.source_length} route_length={round(match_res.route_length)} " + \ - rf"points={len(source_feature.geometry.coords)} points_w_matches={match_res.points_with_matches} " + \ - rf"candidates={match_res.target_candidates_count} matched target_ids: {str(len(match_res.matched_target_ids))} " + \ - rf"elapsed: {match_res.elapsed:.2f}s; avg runtime/feature: {avg_runtime_per_feature:.3f}s") - - print_stats(features_to_match, features_overture, match_results, total_elapsed, avg_runtime_per_feature) - - print("Writing results...") - start = timer() - output_trace_snap_results(match_results, output_file, output_for_judgment) - end = timer() - print(f"Writing time: {(end-start):.2f}s") - calculate_error_rate(features_to_match_file.replace('.geojson', '.labeled.txt'), overture.features_by_id, match_results) - -def get_args(): - parser = argparse.ArgumentParser(description="", add_help=True, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--input-to-match", help="Input file containing features to match in geojson format", required=True) - parser.add_argument("--input-overture", help="Input file containing overture features", required=True) - parser.add_argument("--output", help="Output file containing match results", required=True) - parser.add_argument("--resolution", help="H3 cell resolution used to pre-filter candidates", type=int, default=constants.DEFAULT_H3_RESOLUTION, choices=range(0,15)) - parser.add_argument("--sigma", type=float, help=f"Sigma param - controlling tolerance to GPS noise", required=False, default=constants.DEFAULT_SIGMA) - parser.add_argument("--beta", type=float, help=f"Beta param - controlling confidence in route", required=False, default=constants.DEFAULT_BETA) - parser.add_argument("--allow_loops", type=bool, help=f"Allow same sequence to revisit same segment with other segment(s) in between", required=False, default=constants.DEFAULT_ALLOW_LOOPS) - parser.add_argument("--max_point_to_road_distance", type=float, help=f"Maximum distance in meters between a trace point and a match candidate road", required=False, default=constants.DEFAULT_MAX_POINT_TO_ROAD_DISTANCE) - parser.add_argument("--max_route_to_trace_distance_difference", type=float, help=f"Maximum difference between route and trace lengths in meters", required=False, default=constants.DEFAULT_MAX_ROUTE_TO_TRACE_DISTANCE_DIFFERENCE) - parser.add_argument("--revisit_segment_penalty_weight", type=float, help="How much to penalize a route with one segment revisit", required=False, default=constants.DEFAULT_SEGMENT_REVISIT_PENALTY) - parser.add_argument("--revisit_via_point_penalty_weight", type=float, help="How much to penalize a route with one via-point revisit", required=False, default=constants.DEFAULT_VIA_POINT_PENALTY_WEIGHT) - parser.add_argument("--broken_time_gap_reset_sequence", type=float, help="How big the time gap in seconds between points without valid route options before we consider it a broken sequence", required=False, default=constants.DEFAULT_BROKEN_TIME_GAP_RESET_SEQUENCE) - parser.add_argument("--broken_distance_gap_reset_sequence", type=float, help="How big the distance gap in meters between points without valid route options before we consider it a broken sequence", required=False, default=constants.DEFAULT_BROKEN_DISTANCE_GAP_RESET_SEQUENCE) - parser.add_argument("--j", action="store_true", help="Also output the matches as a 'pre-labeled' file for judgment", default=False, required=False) - return parser.parse_args() - -def get_trace_snap_options_from_args(args): - return TraceSnapOptions( - sigma=args.sigma, - beta=args.beta, - allow_loops=args.allow_loops, - max_point_to_road_distance=args.max_point_to_road_distance, - max_route_to_trace_distance_difference=args.max_route_to_trace_distance_difference, - revisit_segment_penalty_weight=args.revisit_segment_penalty_weight, - revisit_via_point_penalty_weight=args.revisit_via_point_penalty_weight, - broken_time_gap_reset_sequence=args.broken_time_gap_reset_sequence, - broken_distance_gap_reset_sequence=args.broken_distance_gap_reset_sequence) - -if __name__ == "__main__": - args = get_args() - trace_snap_options = get_trace_snap_options_from_args(args) - snap_traces(args.input_to_match, args.input_overture, args.output, args.resolution, trace_snap_options, output_for_judgment=args.j) +import argparse +import csv +import json +import os +import math + +import constants +from route_utils import get_shortest_route +from match_classes import TraceSnapOptions, MatchableFeature, TraceMatchResult, SnappedPointPrediction, PointSnapInfo, RouteStep +from utils import get_features_with_cells, get_seconds_elapsed, get_distance, get_linestring_length, load_matchable_set + +from shapely import Point +from shapely.ops import nearest_points +from timeit import default_timer as timer +from typing import Dict, Iterable + +def get_feature_id_to_connected_features(features_overture: Iterable[MatchableFeature]) -> Dict[str, Iterable[MatchableFeature]]: + """returns a connected roads "graph" as a dictionary of feature id to features that are connected to it, as modeled in overture schema via connector_ids property""" + connector_id_to_features = {} + for feature in features_overture: + for connector_id in feature.get_connector_ids(): + if not connector_id in connector_id_to_features: + connector_id_to_features[connector_id] = [] + connector_id_to_features[connector_id].append(feature) + + feature_id_to_connected_features = {} + for feature in features_overture: + feature_id_to_connected_features[feature.id] = [] + for connector_id in feature.get_connector_ids(): + for other_feature in connector_id_to_features[connector_id]: + if other_feature.id != feature.id: + feature_id_to_connected_features[feature.id].append(other_feature) + return feature_id_to_connected_features + +def read_predictions(predictions_file: str): + """reads snap predictions from tab separated file with columns: trace_id, point_index, gers_id, score""" + p = {} + with open(predictions_file, 'r') as file: + reader = csv.reader(file, delimiter=constants.COLUMN_SEPARATOR) + for row in reader: + try: + trace_id = row[0] + point_index = int(row[1]) + gers_id = row[3] + if not(trace_id in p): + p[trace_id] = {} + p[trace_id][point_index] = gers_id + except ValueError: + continue # header or invalid line + return p + +def calculate_error_rate(labeled_file: str, target_features_by_id: Dict[str, Iterable[MatchableFeature]], match_results: Iterable[TraceMatchResult]): + """returns total error rate from a labeled file and a list of trace match results""" + if not(os.path.exists(labeled_file)): + print(f'no metrics to compute (file {labeled_file} does not exist)') + return + + labels = read_predictions(labeled_file) + total_correct_distance = 0 + total_incorrect_distance = 0 + with open(labeled_file + ".actual.txt",'w') as f: + f.write(constants.COLUMN_SEPARATOR.join(["trace_id", "point_index", "label_gers_id", "prediction_gers_id", "label_snapped_wkt", "prediction_snapped_wkt", "distance_to_prev_point", "is_correct"]) + "\n") + for trace_match_result in match_results: + if not(trace_match_result.id in labels): + continue + + correct_distance = 0 + incorrect_distance = 0 + prev_point = None + + for point in trace_match_result.points: + if not(point.index in labels[trace_match_result.id]): + print(f'no label for trace_id={trace_match_result.id} point_index={point.index}') + break + + label_gers_id = labels[trace_match_result.id][point.index] + dist_to_prev_point = 0 + is_correct = not(point.best_prediction is None) and (str(point.best_prediction.id) == label_gers_id) + if prev_point is not None: + dist_to_prev_point = get_distance(prev_point, point.original_point) + correct_distance += dist_to_prev_point + if not is_correct: + # in the original paper error metric is defined as: (added incorrect route distance + removed correct route distance) / total correct route distance + # but since it's difficult to label the correct route distance (would need to have a reliable routing engine to correctly calculate it) + # we'll just use the distance between the original route's points + # side effect is that start/end points and stopped points that usually have more gps noise will be penalized more than the route distance approach would - + # but that may be preferable to overweigh the more problematic/difficult points + incorrect_distance += dist_to_prev_point + + label_snapped_point = None + if not(label_gers_id in target_features_by_id): + print(f'no target feature for label_gers_id={label_gers_id}') + else: + label_shape = target_features_by_id[label_gers_id].geometry + x, label_snapped_point = nearest_points(point.original_point, label_shape) + + columns = [\ + str(trace_match_result.id), \ + str(point.index), \ + str(label_gers_id), \ + str(point.best_prediction.id) if point.best_prediction is not None else "", \ + label_snapped_point.wkt if label_snapped_point is not None else "", \ + point.best_prediction.snapped_point.wkt if point.best_prediction is not None else "", \ + str(dist_to_prev_point), \ + str(is_correct), \ + ] + f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") + + prev_point = point.original_point + + trace_error_rate = incorrect_distance / correct_distance + print(rf"trace_id={trace_match_result.id} trace_error_rate={trace_error_rate:.2f} correct_distance={correct_distance:.2f} incorrect_distance={incorrect_distance:.2f}") + total_correct_distance += correct_distance + total_incorrect_distance += incorrect_distance + + if total_correct_distance == 0: + print('no correct distance') + return -1 + + total_error_rate = total_incorrect_distance / total_correct_distance + print(rf"total_error_rate={total_error_rate:.2f} total_correct_distance={total_correct_distance:.2f} total_incorrect_distance={total_incorrect_distance:.2f}") + return total_error_rate + +def output_trace_snap_results(match_results: Iterable[TraceMatchResult], output_file_name: str, output_for_judgment: bool = False): + results_json = list(map(lambda x: x.to_json(diagnostic_mode=False, include_all_predictions=False), match_results)) + with open(output_file_name, 'w') as f: + json.dump(results_json, f, indent=4) + + results_json = list(map(lambda x: x.to_json(diagnostic_mode=True, include_all_predictions=False), match_results)) + with open(output_file_name + ".with_diagnostics.json", 'w') as f: + json.dump(results_json, f, indent=4) + + results_json = list(map(lambda x: x.to_json(diagnostic_mode=True, include_all_predictions=True), match_results)) + with open(output_file_name + ".with_diagnostics-all-predictions.json", 'w') as f: + json.dump(results_json, f, indent=4) + + if output_for_judgment: + with open(output_file_name + ".for_judgment.txt",'w') as f: + f.write(constants.COLUMN_SEPARATOR.join(["trace_id", "point_index", "trace_point_wkt", "gers_id"]) + "\n") + for r in match_results: + for idx, p in enumerate(r.points): + columns = [ + str(r.id), + str(idx), + p.original_point.wkt, + str(p.best_prediction.id) if p.best_prediction is not None else "" + ] + f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") + + with open(output_file_name + ".snapped_points.txt",'w') as f: + f.write(constants.COLUMN_SEPARATOR.join(["trace_id", "point_index", "gers_id", "snapped_point_wkt"]) + "\n") + for r in match_results: + for idx, p in enumerate(r.points): + columns = [ + str(r.id), + str(idx), + str(p.best_prediction.id) if p.best_prediction is not None else "", + p.best_prediction.snapped_point.wkt if p.best_prediction is not None else "" + ] + f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") + + with open(output_file_name + ".auto_metrics.txt",'w') as f: + header = [ + "id", + "source_length", + "route_length", + "points", + "points_with_match", + "percent_points_with_match", + "target_candidates_count", + "matched_target_ids_count", + "avg_dist_to_road", + "sequence_breaks", + "revisited_via_points", + "revisited_segments", + "elapsed", + "source_wkt" + ] + f.write(constants.COLUMN_SEPARATOR.join(header) + "\n") + for r in match_results: + columns = [ + str(r.id), + str(r.source_length), + str(r.route_length), + str(len(r.points)), + str(r.points_with_matches), + rf"{(100*r.points_with_matches/len(r.points)):.2f}", + str(r.target_candidates_count), + str(len(r.matched_target_ids)), + str(r.avg_dist_to_road), + str(r.sequence_breaks), + str(r.revisited_via_points), + str(r.revisited_segments), + str(r.elapsed), + str(r.source_wkt), + ] + f.write(constants.COLUMN_SEPARATOR.join(columns) + "\n") + +def set_best_path_predictions(points: Iterable[PointSnapInfo]): + """Sets the best prediction for each point in the sequence, starting from the end and going backwards following the best_prev_prediction chain""" + + last_point = points[-1] + if last_point.predictions is None or len(last_point.predictions) == 0 or last_point.predictions[0].best_log_prob == 0: + return # no path found + + last_point.best_prediction = last_point.predictions[0] # this is sorted descending by probability, so the first one is the best + for idx in range(len(points)-2, -1, -1): + if points[idx + 1].best_prediction is not None: + points[idx].best_prediction = points[idx + 1].best_prediction.best_prev_prediction + else: + if not(points[idx].ignore) and len(points[idx].predictions) > 0: + points[idx].best_prediction = points[idx].predictions[0] + +def extend_sequence(steps: Iterable[RouteStep], prev_prediction: SnappedPointPrediction): + """Extends the sequence of the traveled segments up to the previous point with the new steps; also returns the number of revisited segments and via points""" + revisited_via_points_count = 0 + revisited_segments_count = 0 + extended_sequence = prev_prediction.best_sequence.copy() if prev_prediction.best_sequence is not None else [] + revisited_segments_count = 0 + added_via_points = [] + for step in steps: + if len(extended_sequence) == 0 or step.feature.id != extended_sequence[-1]: # either first step or new feature + if len(extended_sequence) > 0 and step.feature.id in extended_sequence: # different than prev segment but present in the sequence, so we are revisiting it + revisited_segments_count += 1 + extended_sequence.append(step.feature.id) + if step.via_point is not None: + added_via_points.append(step.via_point.wkt) + + if len(added_via_points) > 0: + all_prev_via_points = set() + p = prev_prediction + while p is not None: + if p.best_route_via_points is not None: + for vp in p.best_route_via_points: + all_prev_via_points.add(vp) + if len(all_prev_via_points) > 100: + break # optimization for very long traces, don't need to check all of them, just the recent ones + p = p.best_prev_prediction + + for added_via_point in added_via_points: + if added_via_point in all_prev_via_points: + revisited_via_points_count += 1 + return (extended_sequence, revisited_segments_count, revisited_via_points_count) + +def get_trace_matches(source_feature: MatchableFeature, target_candidates: Iterable[MatchableFeature], options: TraceSnapOptions) -> TraceMatchResult: + """Matches a `source_feature` trace to most likely traveled `targe_candidates` road segments""" + start = timer() + + feature_id_to_connected_features = get_feature_id_to_connected_features(target_candidates) + + filter_feature_ids = set(map(lambda x: x.id, target_candidates)) + + times = source_feature.properties.get('times') + points = [] + prev_point = None + sequence_breaks = 0 + for idx, coord in enumerate(source_feature.geometry.coords): + + original_point = Point(coord[0], coord[1]) + predictions = [] + + for target_feature in target_candidates: + op, snapped_point = nearest_points(original_point, target_feature.geometry) + distance_to_road = get_distance(original_point, snapped_point) + if distance_to_road > options.max_point_to_road_distance: + continue + + emission_prob = (1 / (math.sqrt(2*math.pi) * options.sigma)) * math.exp(-0.5 * ((distance_to_road/options.sigma)**2)) # measurement probability - if was on this road how likely is it to have measured the point at this distance + best_log_prob = None + best_transition_prob = None + best_prev_prediction = None + best_route_dist_from_prev_point = None + best_sequence = None + best_route_via_points = None + best_revisited_via_points_count = 0 + best_revisited_segments_count = 0 + trace_dist_from_prev_point = 0 + # calculate transition probability from all prev point matches to current match candidate target_feature + if prev_point is None: + best_log_prob = math.log(emission_prob) + best_transition_prob = 1 + best_sequence = [target_feature.id] + else: + trace_dist_from_prev_point = get_distance(original_point, prev_point.original_point) + for prev_prediction in prev_point.predictions: + if not(options.allow_loops) and not(prev_prediction.best_sequence is None) and target_feature.id in prev_prediction.best_sequence and prev_prediction.referenced_feature.id != target_feature.id: + # already part of best sequence, but then moved to a different segment, so this is not a good candidate, it means this would walk back on itself + continue + + route = get_shortest_route(target_candidates, feature_id_to_connected_features, prev_prediction.referenced_feature, target_feature, prev_prediction.snapped_point, snapped_point, filter_feature_ids, [] if options.allow_loops else prev_prediction.best_sequence) + # check distance is not float('inf') + if route is None or route.distance == float('inf') : + # couldn't find path, skip this prev_match as impossible to transition from it to this match + continue + + dist_diff = abs(trace_dist_from_prev_point - route.distance) + + transition_prob = (1 / options.beta) * math.exp(-dist_diff / options.beta) + + extended_sequence, revisited_segments_count, revisited_via_points_count = extend_sequence(route.steps, prev_prediction) + transition_prob *= math.exp(-revisited_via_points_count * options.revisit_via_point_penalty_weight) # todo: what's the right way to penalize revisiting via points? + transition_prob *= math.exp(-revisited_segments_count * options.revisit_segment_penalty_weight) # todo: what's the right way to penalize revisiting segments? + + if dist_diff > options.max_route_to_trace_distance_difference or transition_prob <= 0: + continue + #match_prob = prev_prediction.best_prob * emission_prob * transition_prob + # probabilities multiplied over many points go to zero (floating point underflow), so use log of product is sum of logs + match_log_prob = prev_prediction.best_log_prob + math.log(emission_prob) + math.log(transition_prob) + #print(f'point#{idx} prev_prediction={prev_prediction.id} transition_prob={transition_prob} emission_prob={emission_prob} match_prob={match_prob} route_dist_from_prev_point={route_dist_from_prev_point} trace_dist_from_prev_point={trace_dist_from_prev_point} dist_diff={dist_diff}') + if best_log_prob is None or match_log_prob > best_log_prob: + best_log_prob = match_log_prob + best_transition_prob = transition_prob + best_prev_prediction = prev_prediction + best_route_dist_from_prev_point = route.distance + best_sequence = extended_sequence + best_route_via_points = [] + best_revisited_via_points_count = revisited_via_points_count + best_revisited_segments_count = revisited_segments_count + for step in route.steps: + if step.via_point is not None: + best_route_via_points.append(step.via_point.wkt) + # todo: also include the intermediate features in route.path + + if best_log_prob is None: + continue # couldn't find a path to this point, skip it + #print(f'point#{idx} candidate feature={target_feature.id} best_log_prob={best_log_prob} best_prev_point={best_prev_prediction.id if best_prev_prediction is not None else None} best_transition_prob={best_transition_prob} emission_prob={emission_prob} distance_to_road={distance_to_road}') + prediction = SnappedPointPrediction(target_feature.id, snapped_point, target_feature, distance_to_road, best_route_dist_from_prev_point, emission_prob, best_transition_prob, best_log_prob, best_prev_prediction, best_sequence, best_route_via_points, best_revisited_via_points_count, best_revisited_segments_count) + + predictions.append(prediction) + + predictions.sort(key=lambda x: x.best_log_prob, reverse=True) + time_since_prev_point = None if times is None or prev_point is None else get_seconds_elapsed(times[prev_point.index], times[idx]) + time = None if times is None else times[idx] + point = PointSnapInfo(idx, original_point, time, time_since_prev_point, predictions) + points.append(point) + + if len(predictions) > 0: + prev_point = point # don't update prev_point unless it has at least one prediction + else: + # no predictions for this point, so ignore current point and previous point to attempt to recover sequence; + # if gap between current point and prev_point is too big, abandon the prev_point and reset; + # this will happen when there is no road in the target map to match the trace + point.ignore = True + if prev_point is not None: + prev_point.ignore = True + if prev_point.index > 0: + prev_point = points[prev_point.index - 1] + # gap with no candidates too big if 60seconds or 200m since last point + if (time_since_prev_point is not None and time_since_prev_point > options.broken_time_gap_reset_sequence) or \ + trace_dist_from_prev_point > options.broken_distance_gap_reset_sequence: + #print(rf"#{str(idx)}: sequence break; time_since_prev_point={time_since_prev_point} trace_dist_from_prev_point={trace_dist_from_prev_point}") + # we have a sequence break, reset prev point, new sequence will start from next point + sequence_breaks += 1 + prev_point = None + else: + prev_point = None + + set_best_path_predictions(points) + + end = timer() + elapsed = end - start + source_feature_length = get_linestring_length(source_feature.geometry) + t = TraceMatchResult(source_feature.id, source_feature.geometry.wkt, points, source_feature_length, len(target_candidates), elapsed=elapsed, sequence_breaks=sequence_breaks) + set_trace_match_metrics(t) + return t + +def set_trace_match_metrics(t: TraceMatchResult) -> None: + matched_target_ids = set() + route_length = 0 + dist_to_road = 0 + revisited_via_points = 0 + revisited_segments = 0 + points_with_matches = 0 + for point in t.points: + if point.best_prediction is not None and point.best_prediction.referenced_feature is not None: + points_with_matches += 1 + route_length += point.best_prediction.route_distance_to_prev_point if point.best_prediction.route_distance_to_prev_point is not None else 0 + dist_to_road += point.best_prediction.distance_to_snapped_road + revisited_via_points += point.best_prediction.best_revisited_via_points_count + revisited_segments += point.best_prediction.best_revisited_segments_count + matched_target_ids.add(point.best_prediction.referenced_feature.id) + t.matched_target_ids = list(matched_target_ids) + t.points_with_matches = points_with_matches + t.route_length = round(route_length, 2) + t.avg_dist_to_road = round(dist_to_road / points_with_matches, 2) if points_with_matches > 0 else None + t.revisited_via_points = revisited_via_points + t.revisited_segments = revisited_segments + +def print_stats(source_features: Iterable[MatchableFeature], target_features: Iterable[MatchableFeature], match_results: Iterable[TraceMatchResult], total_elapsed: float, avg_runtime_per_feature: float): + num_traces = len(source_features) + total_route_length = sum([r.route_length for r in match_results]) / 1000 # in km + total_traces_length = sum([r.source_length for r in match_results]) / 1000 # in km + total_candidates = sum([r.target_candidates_count for r in match_results]) + total_matches = sum([len(r.matched_target_ids) for r in match_results]) + total_sequence_breaks = sum([r.sequence_breaks for r in match_results]) + total_revisited_via_points = sum([r.revisited_via_points for r in match_results]) + total_revisited_segments = sum([r.revisited_segments for r in match_results]) + total_traces_with_matches = sum([1 for r in match_results if r.points_with_matches > 0]) + total_avg_dist_to_road = sum([r.avg_dist_to_road for r in match_results if r.points_with_matches > 0]) + avg_runtime_per_km = total_elapsed / total_traces_length if total_traces_length > 0 else None + avg_dist_to_road = round(total_avg_dist_to_road / total_traces_with_matches, 2) if total_traces_with_matches > 0 else None + + print("==================================================================") + print("Totals:") + print("==================================================================") + print(rf"Traces.............................{num_traces}") + print(rf"Target features....................{len(target_features)}") + print(rf"Elapsed:...........................{round(total_elapsed//60)}min {total_elapsed%60:.3f}s") + print(rf"Avg runtime/trace..................{avg_runtime_per_feature:.3f}s") + print(rf"Avg runtime/km.....................{avg_runtime_per_km:.3f}s") + print(rf"Avg distance to snapped road.......{avg_dist_to_road}m") + print(rf"Snapped route length...............{total_route_length:.2f}km") + print(rf"GPS traces length..................{total_traces_length:.2f}km") + print(rf"Snapped route len/gps len..........{(total_route_length/total_traces_length):.2f}") + print(rf"Avg number of candidate segments...{(total_candidates/num_traces):.2f}/trace, {(total_candidates/total_traces_length):.2f}/km") + print(rf"Avg number of matched segments.....{(total_matches/num_traces):.2f}/trace, {(total_matches/total_traces_length):.2f}/km") + print(rf"Avg number of sequence breaks......{(total_sequence_breaks/num_traces):.2f}/trace, {(total_sequence_breaks/total_traces_length):.2f}/km") + print(rf"Avg number of revisited via points.{(total_revisited_via_points/num_traces):.2f}/trace, {(total_revisited_via_points/total_traces_length):.2f}/km") + print(rf"Avg number of revisited segments...{(total_revisited_segments/num_traces):.2f}/trace, {(total_revisited_segments/total_traces_length):.2f}/km") + print("==================================================================") + +def snap_traces(features_to_match_file: str, overture_file: str, output_file: str, res: int, snap_options: TraceSnapOptions=None, output_for_judgment: bool=False) -> None: + if snap_options is None: + snap_options = TraceSnapOptions() # loads default options + + # save the options we used next to the output file for debugging or comparison with other runs + with open(output_file + ".options.json", "w") as f: + json.dump(snap_options.__dict__, f, indent=4) + + start = timer() + print("Loading features...") + to_match_prop_filter = {} + #to_match_prop_filter["id"] = "manual_trace#4" + to_match = load_matchable_set(features_to_match_file, is_multiline=False, res=res) + features_to_match = to_match.features_by_id.values() + if len(features_to_match) == 0: + print("no features to match") + exit() + + overture = load_matchable_set(overture_file, is_multiline=True, properties_filter = {"type": "segment"}, res=res) + features_overture =overture.features_by_id.values() + print("Features to match: " + str(len(features_to_match))) + print("Features Overture: " + str(len(features_overture))) + end = timer() + print(f"Loading time: {(end-start):.2f}s") + + i = 0 + match_results = [] + total_elapsed = 0 + for source_feature in features_to_match: + i += 1 + + target_candidates = get_features_with_cells(overture.features_by_cell, to_match.cells_by_id[source_feature.id]) + match_res = get_trace_matches(source_feature, target_candidates, snap_options) + match_results.append(match_res) + + total_elapsed += match_res.elapsed + avg_runtime_per_feature = total_elapsed / i + + if i%1 == 0: + print(rf"trace#{str(i)} length={match_res.source_length} route_length={round(match_res.route_length)} " + \ + rf"points={len(source_feature.geometry.coords)} points_w_matches={match_res.points_with_matches} " + \ + rf"candidates={match_res.target_candidates_count} matched target_ids: {str(len(match_res.matched_target_ids))} " + \ + rf"elapsed: {match_res.elapsed:.2f}s; avg runtime/feature: {avg_runtime_per_feature:.3f}s") + + print_stats(features_to_match, features_overture, match_results, total_elapsed, avg_runtime_per_feature) + + print("Writing results...") + start = timer() + output_trace_snap_results(match_results, output_file, output_for_judgment) + end = timer() + print(f"Writing time: {(end-start):.2f}s") + calculate_error_rate(features_to_match_file.replace('.geojson', '.labeled.txt'), overture.features_by_id, match_results) + +def get_args(): + parser = argparse.ArgumentParser(description="", add_help=True, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("--input-to-match", help="Input file containing features to match in geojson format", required=True) + parser.add_argument("--input-overture", help="Input file containing overture features", required=True) + parser.add_argument("--output", help="Output file containing match results", required=True) + parser.add_argument("--resolution", help="H3 cell resolution used to pre-filter candidates", type=int, default=constants.DEFAULT_H3_RESOLUTION, choices=range(0,15)) + parser.add_argument("--sigma", type=float, help=f"Sigma param - controlling tolerance to GPS noise", required=False, default=constants.DEFAULT_SIGMA) + parser.add_argument("--beta", type=float, help=f"Beta param - controlling confidence in route", required=False, default=constants.DEFAULT_BETA) + parser.add_argument("--allow_loops", type=bool, help=f"Allow same sequence to revisit same segment with other segment(s) in between", required=False, default=constants.DEFAULT_ALLOW_LOOPS) + parser.add_argument("--max_point_to_road_distance", type=float, help=f"Maximum distance in meters between a trace point and a match candidate road", required=False, default=constants.DEFAULT_MAX_POINT_TO_ROAD_DISTANCE) + parser.add_argument("--max_route_to_trace_distance_difference", type=float, help=f"Maximum difference between route and trace lengths in meters", required=False, default=constants.DEFAULT_MAX_ROUTE_TO_TRACE_DISTANCE_DIFFERENCE) + parser.add_argument("--revisit_segment_penalty_weight", type=float, help="How much to penalize a route with one segment revisit", required=False, default=constants.DEFAULT_SEGMENT_REVISIT_PENALTY) + parser.add_argument("--revisit_via_point_penalty_weight", type=float, help="How much to penalize a route with one via-point revisit", required=False, default=constants.DEFAULT_VIA_POINT_PENALTY_WEIGHT) + parser.add_argument("--broken_time_gap_reset_sequence", type=float, help="How big the time gap in seconds between points without valid route options before we consider it a broken sequence", required=False, default=constants.DEFAULT_BROKEN_TIME_GAP_RESET_SEQUENCE) + parser.add_argument("--broken_distance_gap_reset_sequence", type=float, help="How big the distance gap in meters between points without valid route options before we consider it a broken sequence", required=False, default=constants.DEFAULT_BROKEN_DISTANCE_GAP_RESET_SEQUENCE) + parser.add_argument("--j", action="store_true", help="Also output the matches as a 'pre-labeled' file for judgment", default=False, required=False) + return parser.parse_args() + +def get_trace_snap_options_from_args(args): + return TraceSnapOptions( + sigma=args.sigma, + beta=args.beta, + allow_loops=args.allow_loops, + max_point_to_road_distance=args.max_point_to_road_distance, + max_route_to_trace_distance_difference=args.max_route_to_trace_distance_difference, + revisit_segment_penalty_weight=args.revisit_segment_penalty_weight, + revisit_via_point_penalty_weight=args.revisit_via_point_penalty_weight, + broken_time_gap_reset_sequence=args.broken_time_gap_reset_sequence, + broken_distance_gap_reset_sequence=args.broken_distance_gap_reset_sequence) + +if __name__ == "__main__": + args = get_args() + trace_snap_options = get_trace_snap_options_from_args(args) + snap_traces(args.input_to_match, args.input_overture, args.output, args.resolution, trace_snap_options, output_for_judgment=args.j) diff --git a/gers/examples/python/route_utils.py b/gers/examples/python/route_utils.py index 51cbe71bc..a9d72a7dc 100644 --- a/gers/examples/python/route_utils.py +++ b/gers/examples/python/route_utils.py @@ -1,91 +1,91 @@ -from utils import get_distance -from shapely.ops import nearest_points -from match_classes import RouteStep, Route, MatchableFeature -from shapely.geometry import Point -from typing import Dict, Tuple, Iterable - -def get_route_step_dist(feat_before_from: MatchableFeature, feat_from: MatchableFeature, feat_to: MatchableFeature, start_feature: MatchableFeature, end_feature: MatchableFeature, start_point: Point, end_point: Point) -> Tuple[Point, float]: - """get distance traveled on one feature `feat_from` having entering from `feat_before_from` and exiting to `feat_to`, given that the whole route starts at `start_feature` and ends at `end_feature`""" - # todo: this a distance approximation for now as length of straight line from entry point to exit point on the feat_from feature, but works reasonably well for the data seen so far - feat_from_exit_point, p2 = nearest_points(feat_from.geometry, feat_to.geometry) - d = 0 - - if feat_from.id == start_feature.id: - d += get_distance(start_point, feat_from_exit_point) - else: - p0_before, feat_from_entry_point = nearest_points(feat_before_from.geometry, feat_from.geometry) - d += get_distance(feat_from_entry_point, feat_from_exit_point) - - if feat_to.id == end_feature.id: - d += get_distance(end_point, p2) - # else there is no distance to add - - # todo: add basic penalties like allowed travel direction disagreement, road class change cost, etc. - return feat_from_exit_point, d - -def get_shortest_route(features: Iterable[MatchableFeature], feature_id_to_connected_features: Dict[str, Iterable[MatchableFeature]], start_feature: MatchableFeature, end_feature: MatchableFeature, start_point: Point, end_point: Point, allowed_ids: Iterable[str], blocked_ids: Iterable[str]) -> Route: - """ - Dijsktra's algorithm to find shortest route between start and end features. Remember for each traveled feature the entry via_point. - """ - - # start and end are same feature, no route calculation needed, just distance - if start_feature.id == end_feature.id: - dist = get_distance(start_point, end_point) - return Route(dist, [RouteStep(start_feature, None)]) - x = set() - - dist = {} - prev = {} - prev_via_point = {} - feats_to_visit = [] - ids_to_visit = set() - for f in features: - if f.id in blocked_ids and f.id != start_feature.id: - continue - dist[f.id] = float('inf') - prev[f.id] = None - prev_via_point[f.id] = None - feats_to_visit.append(f) - ids_to_visit.add(f.id) - dist[start_feature.id] = 0 - - while len(feats_to_visit) > 0: - current_feature = feats_to_visit[0] - min_dist = float('inf') - for f in feats_to_visit: - if dist[f.id] < min_dist: - min_dist = dist[f.id] - current_feature = f - - if min_dist == float('inf'): - break # no more allowed connected features to visit - - if current_feature.id == end_feature.id: - break # done, visited end_feature, don't need to calculate shortest path to all features - - feats_to_visit.remove(current_feature) - ids_to_visit.remove(current_feature.id) - connected_features = feature_id_to_connected_features[current_feature.id] - for v in connected_features: - if not(v.id in allowed_ids) or (v.id in blocked_ids) or not(v.id in ids_to_visit): - continue - - if not(v.id in ids_to_visit): - continue # have already visited this feature - - via_point, d = get_route_step_dist(prev[current_feature.id], current_feature, v, start_feature, end_feature, start_point, end_point) - alternate_dist = dist[current_feature.id] + d - if alternate_dist < dist[v.id]: - dist[v.id] = alternate_dist - prev[v.id] = current_feature - prev_via_point[v.id] = via_point - - steps = [] - current_feature = end_feature - if prev[current_feature.id] is not None or current_feature.id == start_feature.id: - while current_feature is not None: - steps.insert(0, RouteStep(current_feature, prev_via_point[current_feature.id])) - current_feature = prev[current_feature.id] - - r = Route(round(dist[end_feature.id], 2), steps) +from utils import get_distance +from shapely.ops import nearest_points +from match_classes import RouteStep, Route, MatchableFeature +from shapely.geometry import Point +from typing import Dict, Tuple, Iterable + +def get_route_step_dist(feat_before_from: MatchableFeature, feat_from: MatchableFeature, feat_to: MatchableFeature, start_feature: MatchableFeature, end_feature: MatchableFeature, start_point: Point, end_point: Point) -> Tuple[Point, float]: + """get distance traveled on one feature `feat_from` having entering from `feat_before_from` and exiting to `feat_to`, given that the whole route starts at `start_feature` and ends at `end_feature`""" + # todo: this a distance approximation for now as length of straight line from entry point to exit point on the feat_from feature, but works reasonably well for the data seen so far + feat_from_exit_point, p2 = nearest_points(feat_from.geometry, feat_to.geometry) + d = 0 + + if feat_from.id == start_feature.id: + d += get_distance(start_point, feat_from_exit_point) + else: + p0_before, feat_from_entry_point = nearest_points(feat_before_from.geometry, feat_from.geometry) + d += get_distance(feat_from_entry_point, feat_from_exit_point) + + if feat_to.id == end_feature.id: + d += get_distance(end_point, p2) + # else there is no distance to add + + # todo: add basic penalties like allowed travel direction disagreement, road class change cost, etc. + return feat_from_exit_point, d + +def get_shortest_route(features: Iterable[MatchableFeature], feature_id_to_connected_features: Dict[str, Iterable[MatchableFeature]], start_feature: MatchableFeature, end_feature: MatchableFeature, start_point: Point, end_point: Point, allowed_ids: Iterable[str], blocked_ids: Iterable[str]) -> Route: + """ + Dijsktra's algorithm to find shortest route between start and end features. Remember for each traveled feature the entry via_point. + """ + + # start and end are same feature, no route calculation needed, just distance + if start_feature.id == end_feature.id: + dist = get_distance(start_point, end_point) + return Route(dist, [RouteStep(start_feature, None)]) + x = set() + + dist = {} + prev = {} + prev_via_point = {} + feats_to_visit = [] + ids_to_visit = set() + for f in features: + if f.id in blocked_ids and f.id != start_feature.id: + continue + dist[f.id] = float('inf') + prev[f.id] = None + prev_via_point[f.id] = None + feats_to_visit.append(f) + ids_to_visit.add(f.id) + dist[start_feature.id] = 0 + + while len(feats_to_visit) > 0: + current_feature = feats_to_visit[0] + min_dist = float('inf') + for f in feats_to_visit: + if dist[f.id] < min_dist: + min_dist = dist[f.id] + current_feature = f + + if min_dist == float('inf'): + break # no more allowed connected features to visit + + if current_feature.id == end_feature.id: + break # done, visited end_feature, don't need to calculate shortest path to all features + + feats_to_visit.remove(current_feature) + ids_to_visit.remove(current_feature.id) + connected_features = feature_id_to_connected_features[current_feature.id] + for v in connected_features: + if not(v.id in allowed_ids) or (v.id in blocked_ids) or not(v.id in ids_to_visit): + continue + + if not(v.id in ids_to_visit): + continue # have already visited this feature + + via_point, d = get_route_step_dist(prev[current_feature.id], current_feature, v, start_feature, end_feature, start_point, end_point) + alternate_dist = dist[current_feature.id] + d + if alternate_dist < dist[v.id]: + dist[v.id] = alternate_dist + prev[v.id] = current_feature + prev_via_point[v.id] = via_point + + steps = [] + current_feature = end_feature + if prev[current_feature.id] is not None or current_feature.id == start_feature.id: + while current_feature is not None: + steps.insert(0, RouteStep(current_feature, prev_via_point[current_feature.id])) + current_feature = prev[current_feature.id] + + r = Route(round(dist[end_feature.id], 2), steps) return r \ No newline at end of file diff --git a/gers/examples/python/tests/match_traces_test.py b/gers/examples/python/tests/match_traces_test.py index 07cd77a8b..72df0a3e2 100644 --- a/gers/examples/python/tests/match_traces_test.py +++ b/gers/examples/python/tests/match_traces_test.py @@ -1,52 +1,52 @@ -import test_setup -import os -import json -import unittest -import constants -from match_classes import TraceSnapOptions -from match_traces import get_trace_matches -from utils import load_matchable_set, get_features_with_cells - -class TestTraces(unittest.TestCase): - - def test_match_traces(self): - features_to_match_file = os.path.join(constants.DATA_DIR, "macon-manual-traces.geojson") - overture_file = os.path.join(constants.DATA_DIR, "overture-transportation-macon.geojson") - res = 12 - - to_match = load_matchable_set(features_to_match_file, is_multiline=False, res=res) - self.assertIsNotNone(to_match) - self.assertEqual(len(to_match.features_by_id), 4) - - id_to_match = "manual_trace#1" - self.assertIn(id_to_match, to_match.features_by_id) - source_feature = to_match.features_by_id[id_to_match] - - overture = load_matchable_set(overture_file, is_multiline=True, properties_filter = {"type": "segment"}, res=res) - self.assertIsNotNone(overture.features_by_id) - self.assertGreater(len(overture.features_by_id), 20000) - - options = TraceSnapOptions(max_point_to_road_distance=30) - target_candidates = get_features_with_cells(overture.features_by_cell, to_match.cells_by_id[source_feature.id]) - match_res = get_trace_matches(source_feature, target_candidates, options) - self.assertIsNotNone(match_res) - self.assertIsNotNone(match_res.points) - self.assertEqual(len(match_res.points), len(source_feature.geometry.coords)) - - self.assertGreater(match_res.source_length, 5000) - self.assertGreater(match_res.route_length, 5000) - - json_res = match_res.to_json() - self.assertIsNotNone(json_res) - j = json.dumps(json_res, indent=4) - - for idx, p in enumerate(match_res.points): - bp = p.best_prediction - self.assertIsNotNone(bp, f"best prediction for point {idx} is None") - self.assertIsNotNone(bp.id, f"best prediction for point {idx} has no id") - self.assertGreater(bp.distance_to_snapped_road, 0.0) - if idx > 0: - self.assertGreater(bp.route_distance_to_prev_point, 0.0) - -if __name__ == '__main__': +import test_setup +import os +import json +import unittest +import constants +from match_classes import TraceSnapOptions +from match_traces import get_trace_matches +from utils import load_matchable_set, get_features_with_cells + +class TestTraces(unittest.TestCase): + + def test_match_traces(self): + features_to_match_file = os.path.join(constants.DATA_DIR, "macon-manual-traces.geojson") + overture_file = os.path.join(constants.DATA_DIR, "overture-transportation-macon.geojson") + res = 12 + + to_match = load_matchable_set(features_to_match_file, is_multiline=False, res=res) + self.assertIsNotNone(to_match) + self.assertEqual(len(to_match.features_by_id), 4) + + id_to_match = "manual_trace#1" + self.assertIn(id_to_match, to_match.features_by_id) + source_feature = to_match.features_by_id[id_to_match] + + overture = load_matchable_set(overture_file, is_multiline=True, properties_filter = {"type": "segment"}, res=res) + self.assertIsNotNone(overture.features_by_id) + self.assertGreater(len(overture.features_by_id), 20000) + + options = TraceSnapOptions(max_point_to_road_distance=30) + target_candidates = get_features_with_cells(overture.features_by_cell, to_match.cells_by_id[source_feature.id]) + match_res = get_trace_matches(source_feature, target_candidates, options) + self.assertIsNotNone(match_res) + self.assertIsNotNone(match_res.points) + self.assertEqual(len(match_res.points), len(source_feature.geometry.coords)) + + self.assertGreater(match_res.source_length, 5000) + self.assertGreater(match_res.route_length, 5000) + + json_res = match_res.to_json() + self.assertIsNotNone(json_res) + j = json.dumps(json_res, indent=4) + + for idx, p in enumerate(match_res.points): + bp = p.best_prediction + self.assertIsNotNone(bp, f"best prediction for point {idx} is None") + self.assertIsNotNone(bp.id, f"best prediction for point {idx} has no id") + self.assertGreater(bp.distance_to_snapped_road, 0.0) + if idx > 0: + self.assertGreater(bp.route_distance_to_prev_point, 0.0) + +if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/gers/examples/python/tests/test_setup.py b/gers/examples/python/tests/test_setup.py index 457368324..9f826dea1 100644 --- a/gers/examples/python/tests/test_setup.py +++ b/gers/examples/python/tests/test_setup.py @@ -1,5 +1,5 @@ -import sys -import os - -parent_dir = os.path.dirname(os.path.abspath(__file__)) +import sys +import os + +parent_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.dirname(parent_dir)) \ No newline at end of file diff --git a/gers/examples/python/tests/utils_test.py b/gers/examples/python/tests/utils_test.py index 22e82586b..c98c3d20b 100644 --- a/gers/examples/python/tests/utils_test.py +++ b/gers/examples/python/tests/utils_test.py @@ -1,52 +1,52 @@ -import test_setup -import os -import unittest -import constants -from utils import get_distance, get_linestring_length, get_intersecting_h3_cells_for_geo_json, load_matchable_set -from shapely import Point, LineString - -class TestUtils(unittest.TestCase): - - def test_load_matchable_set_geojson(self): - features_to_match_file = os.path.join(constants.DATA_DIR, "macon-manual-traces.geojson") - s = load_matchable_set(features_to_match_file, res=12, is_multiline=False) - self.assertIsNotNone(s) - self.assertEqual(len(s.features_by_id), 4) - self.assertEqual(len(s.cells_by_id), 4) - self.assertGreater(len(s.features_by_cell), 0) - - def test_get_distance(self): - p1 = Point(-83.6878343, 32.8413587) - p2 = Point(-83.6877941, 32.8413903) - - d = get_distance(p1, p2) - self.assertAlmostEqual(d, 5.1, delta=0.1) - - def test_get_linestring_length(self): - l = LineString([(-83.6878343, 32.8413587), (-83.6877941, 32.8413903)]) - - d = get_linestring_length(l) - self.assertAlmostEqual(d, 5.1, delta=0.1) - - def test_get_intersecting_h3_cells_for_geo_json(self): - point = { "type": "Point", "coordinates": [-83.6197063, 32.8589311] } - actual_cells = get_intersecting_h3_cells_for_geo_json(point, 10) - expected_cells = ["8a44c0a32867fff"] - self.assertCountEqual(actual_cells, expected_cells) - - line = { "type": "LineString", "coordinates": [[-83.61940200000001, 32.858034], [-83.61940200000001, 32.859538]] } - actual_cells = get_intersecting_h3_cells_for_geo_json(line, 10) - expected_cells = ["8a44c0a3295ffff", "8a44c0a32867fff"] - self.assertCountEqual(actual_cells, expected_cells) - - polygon = { "type": "Polygon", "coordinates": [[[-83.6195695, 32.8591587], [-83.6192584, 32.8583723], [-83.619135, 32.8590437], [-83.6195695, 32.8591587]]] } - actual_cells = get_intersecting_h3_cells_for_geo_json(polygon, 10) - expected_cells = ["8a44c0a32877fff", "8a44c0a32867fff", "8a44c0a3295ffff"] - self.assertCountEqual(actual_cells, expected_cells) - - ml = { "type": "MultiLineString", "coordinates": [[[-83.61940200000001, 32.858034], [-83.61940200000001, 32.859538]], [[-83.6215878, 32.8580366], [-83.6202145, 32.8580546]]] } - actual_cells = get_intersecting_h3_cells_for_geo_json(ml, 10) - expected_cells = ["8a44c0a3294ffff", "8a44c0a32867fff", "8a44c0a304b7fff", "8a44c0a3295ffff"] - self.assertCountEqual(actual_cells, expected_cells) - - +import test_setup +import os +import unittest +import constants +from utils import get_distance, get_linestring_length, get_intersecting_h3_cells_for_geo_json, load_matchable_set +from shapely import Point, LineString + +class TestUtils(unittest.TestCase): + + def test_load_matchable_set_geojson(self): + features_to_match_file = os.path.join(constants.DATA_DIR, "macon-manual-traces.geojson") + s = load_matchable_set(features_to_match_file, res=12, is_multiline=False) + self.assertIsNotNone(s) + self.assertEqual(len(s.features_by_id), 4) + self.assertEqual(len(s.cells_by_id), 4) + self.assertGreater(len(s.features_by_cell), 0) + + def test_get_distance(self): + p1 = Point(-83.6878343, 32.8413587) + p2 = Point(-83.6877941, 32.8413903) + + d = get_distance(p1, p2) + self.assertAlmostEqual(d, 5.1, delta=0.1) + + def test_get_linestring_length(self): + l = LineString([(-83.6878343, 32.8413587), (-83.6877941, 32.8413903)]) + + d = get_linestring_length(l) + self.assertAlmostEqual(d, 5.1, delta=0.1) + + def test_get_intersecting_h3_cells_for_geo_json(self): + point = { "type": "Point", "coordinates": [-83.6197063, 32.8589311] } + actual_cells = get_intersecting_h3_cells_for_geo_json(point, 10) + expected_cells = ["8a44c0a32867fff"] + self.assertCountEqual(actual_cells, expected_cells) + + line = { "type": "LineString", "coordinates": [[-83.61940200000001, 32.858034], [-83.61940200000001, 32.859538]] } + actual_cells = get_intersecting_h3_cells_for_geo_json(line, 10) + expected_cells = ["8a44c0a3295ffff", "8a44c0a32867fff"] + self.assertCountEqual(actual_cells, expected_cells) + + polygon = { "type": "Polygon", "coordinates": [[[-83.6195695, 32.8591587], [-83.6192584, 32.8583723], [-83.619135, 32.8590437], [-83.6195695, 32.8591587]]] } + actual_cells = get_intersecting_h3_cells_for_geo_json(polygon, 10) + expected_cells = ["8a44c0a32877fff", "8a44c0a32867fff", "8a44c0a3295ffff"] + self.assertCountEqual(actual_cells, expected_cells) + + ml = { "type": "MultiLineString", "coordinates": [[[-83.61940200000001, 32.858034], [-83.61940200000001, 32.859538]], [[-83.6215878, 32.8580366], [-83.6202145, 32.8580546]]] } + actual_cells = get_intersecting_h3_cells_for_geo_json(ml, 10) + expected_cells = ["8a44c0a3294ffff", "8a44c0a32867fff", "8a44c0a304b7fff", "8a44c0a3295ffff"] + self.assertCountEqual(actual_cells, expected_cells) + + diff --git a/gers/examples/python/utils.py b/gers/examples/python/utils.py index c66599f24..d91ed13cf 100644 --- a/gers/examples/python/utils.py +++ b/gers/examples/python/utils.py @@ -1,212 +1,212 @@ -import csv -import json -from haversine import haversine, Unit -#from shapely.ops import transform -from shapely import wkt -from shapely.geometry import shape, mapping -from shapely.geometry.base import BaseGeometry -from dateutil import parser -from typing import Any, Dict, Iterable -from h3 import h3 - -from match_classes import MatchableFeature, MatchableFeaturesSet -#from pyproj import Geod - -def get_seconds_elapsed(t1_str, t2_str): - t1 = parser.parse(t1_str) - t2 = parser.parse(t2_str) - return (t2 - t1).total_seconds() - -def get_linestring_length(ls): - length = 0 - for i in range(len(ls.coords) - 1): - lon1, lat1 = ls.coords[i] - lon2, lat2 = ls.coords[i+1] - #_, _, d = geod.inv(lon1, lat1, lon2, lat2) - d = haversine((lat1, lon1), (lat2, lon2), unit=Unit.METERS) - length += d - return round(length, 2) - -def get_distance(point1, point2): - #_, _, d = geod.inv(point1.x, point1.y, point2.x, point2.y) - d = haversine((point1.y, point1.x), (point2.y, point2.x), unit=Unit.METERS) - return round(d, 2) - -def get_intersecting_h3_cells_for_line(coords, res): - """for coordinates of a linestring, gets all h3 cells of given resolution that intersect the line""" - cells = set() - prevCell = None - for coord in coords: - cell = h3.geo_to_h3(coord[1], coord[0], res) - cells.add(cell) - if (prevCell is None): - prevCell = cell - else: - if (prevCell != cell): - # two consecutive coordinates in the linestring may be more than one cell apart - # need to find intermediate cells between previous cell and the current one - if (not h3.h3_indexes_are_neighbors(prevCell, cell)): - intermediateCells = h3.h3_line(prevCell, cell) - for intermediateCell in intermediateCells: - cells.add(intermediateCell) - prevCell = cell - return cells - -def get_intersecting_h3_cells_for_geo_json(geometry: Any, res:int) -> Iterable[str]: - """gets all h3 cells of given resolution that intersect the geometry.""" - # h3 api wants two floats for point, geojson dict for polygon and custom code is needed for line and multi* geometries - geojson = mapping(geometry) if isinstance(geometry, BaseGeometry) else geometry - geom_type = geojson["type"] - coords = geojson["coordinates"] - if (geom_type.startswith("Multi")): - sub_geom_type = geom_type.replace("Multi", "") - sub_geoms = [{"type": sub_geom_type, "coordinates": sub_geom_coords } for sub_geom_coords in coords] - sub_cells = [sub_cell for sub_geom in sub_geoms for sub_cell in get_intersecting_h3_cells_for_geo_json(sub_geom, res)] - return set(sub_cells) - if (geom_type == "Point"): - return set([h3.geo_to_h3(coords[1], coords[0], res)]) - if (geom_type == "LineString"): - return get_intersecting_h3_cells_for_line(coords, res) - if (geom_type == "Polygon"): - innerCells = h3.polyfill(geojson, res, True) # this only covers the tiles whose centers are inside the polygon - boundaryCells = get_intersecting_h3_cells_for_line(coords[0], res) - return innerCells | boundaryCells - -def matches_properties_filter(feature: Dict[str, Any], properties_filter: Dict[str, Any]) -> bool: - if properties_filter is None: - return True - feat_props = feature.get("properties") - for prop in properties_filter: - if prop == "id": - return feature.get("id") == properties_filter[prop] - - if not prop in feat_props or (properties_filter[prop] != "*" and feat_props[prop] != properties_filter[prop]): - return False - return True - -def get_matchable_feature(feature_dict: Dict[str, Any]) -> MatchableFeature: - """creates a MatchableFeature from a dict with expected keys [id, geometry, properties], which could be either a geojson or parsed from a csv file with wkt geometry""" - id = feature_dict.get("id") - geom = feature_dict.get("geometry") - if type(geom) is dict and "type" in geom and "coordinates" in geom: - # if it"s a geojson feature - s = shape(geom) - elif isinstance(geom, str): - # if it"s a wkt string - s = wkt.loads(geom) - props = feature_dict.get("properties") - return MatchableFeature(id, s, props) - -def get_feature_cells(geom: Any, res: int, k_rings_to_add:int=1): - """gets all h3 cells of given resolution that intersect the geometry, and also the cells that are k rings around the intersecting cells""" - h3_cells = get_intersecting_h3_cells_for_geo_json(geom, res) - if k_rings_to_add == 0: - return list(h3_cells) - - rings = [h3.k_ring(h, k_rings_to_add) for h in h3_cells] - return list(set(cell for r in rings for cell in r)) - -def parse_geojson(filename: str, is_multiline: bool) -> Iterable[Dict[str, Any]]: - with open(filename, mode="r", errors="ignore") as file: - if is_multiline: - # text file with one geojson per line - i=0 - features = [] - for line in file: - i += 1 - try: - geojson = json.loads(line.strip().rstrip(",")) - features.append(geojson) - except Exception as x: - print(fr"Line {i}: " + str(x)) - return features - else: - full_gj = json.loads(file.read()) - if full_gj.get("type") == "FeatureCollection": - return full_gj.get("features") - else: - return [full_gj] - -def get_matchable_set(features: Iterable[Dict[str, Any]], properties_filter: dict=None, res: int=12, limit_feature_count=-1) -> MatchableFeaturesSet: - features_by_id = {} - cells_by_id = {} - features_by_cell = {} - for feature_dict in features: - try: - if not matches_properties_filter(feature_dict, properties_filter): - continue - - feature = get_matchable_feature(feature_dict) - features_by_id[feature.id] = feature - cells_by_id[feature.id] = get_feature_cells(feature.geometry, res) - for cell in cells_by_id[feature.id]: - if not cell in features_by_cell: - features_by_cell[cell] = [] - features_by_cell[cell].append(feature) - except Exception as x: - print(str(x)) - - if limit_feature_count > 0 and len(features_by_id) >= limit_feature_count: - break - return MatchableFeaturesSet(features_by_id, cells_by_id, features_by_cell) - -def parse_csv(filename: str, delimiter: str=",") -> MatchableFeaturesSet: - features = [] - i=0 - with open(filename, mode="r", errors="ignore") as file: - reader = csv.DictReader(file, delimiter=delimiter) - for row in reader: - feat_dict = {} - feat_dict["properties"] = {} - for k, v in row.items(): - default_id = str(i) - i += 1 - key = k.lower() - if not "id" in feat_dict and "id" in key: - feat_dict["id"] = v - elif not "geometry" in feat_dict and ("geometry" in key or "wkt" in key): - feat_dict["geometry"] = v - else: - feat_dict["properties"][k] = v - - if not "id" in feat_dict: - feat_dict["id"] = default_id - - if not "geometry" in feat_dict: - if "lat" in feat_dict["properties"] and "lon" in feat_dict["properties"]: - feat_dict["geometry"] = f'POINT({feat_dict["properties"]["lon"]} {feat_dict["properties"]["lat"]})' - else: - continue - - features.append(feat_dict) - return features - -def load_matchable_set(filename: str, properties_filter: dict=None, res: int=12, limit_feature_count=-1, is_multiline: bool=False, delimiter: str=",") -> MatchableFeaturesSet: - """loads a MatchableFeaturesSet from a geojson or csv file""" - extension = filename.split(".")[-1] - match extension: - case "geojson" | "json": - features = parse_geojson(filename, is_multiline=is_multiline) - case "csv": - features = parse_csv(filename, delimiter=delimiter) - case _: - raise Exception(f"Unsupported file type: {extension}") - - s = get_matchable_set(features, properties_filter, res, limit_feature_count) - return s - -def get_features_with_cells(features_by_cell: Dict[str, Iterable[MatchableFeature]], cells_filter: Iterable[str]) -> Iterable[MatchableFeature]: - """gets all features in `features_by_cell` that intersect any of the cells in `cells_filter`""" - with_cells = [] - candidate_ids = set() - for cell in cells_filter: - if cell in features_by_cell: - for candidate in features_by_cell[cell]: - if not candidate.id in candidate_ids: - candidate_ids.add(candidate.id) - with_cells.append(candidate) - return with_cells - -def write_json(results_json: Any, output_file_name: str): - with open(output_file_name, "w") as f: +import csv +import json +from haversine import haversine, Unit +#from shapely.ops import transform +from shapely import wkt +from shapely.geometry import shape, mapping +from shapely.geometry.base import BaseGeometry +from dateutil import parser +from typing import Any, Dict, Iterable +from h3 import h3 + +from match_classes import MatchableFeature, MatchableFeaturesSet +#from pyproj import Geod + +def get_seconds_elapsed(t1_str, t2_str): + t1 = parser.parse(t1_str) + t2 = parser.parse(t2_str) + return (t2 - t1).total_seconds() + +def get_linestring_length(ls): + length = 0 + for i in range(len(ls.coords) - 1): + lon1, lat1 = ls.coords[i] + lon2, lat2 = ls.coords[i+1] + #_, _, d = geod.inv(lon1, lat1, lon2, lat2) + d = haversine((lat1, lon1), (lat2, lon2), unit=Unit.METERS) + length += d + return round(length, 2) + +def get_distance(point1, point2): + #_, _, d = geod.inv(point1.x, point1.y, point2.x, point2.y) + d = haversine((point1.y, point1.x), (point2.y, point2.x), unit=Unit.METERS) + return round(d, 2) + +def get_intersecting_h3_cells_for_line(coords, res): + """for coordinates of a linestring, gets all h3 cells of given resolution that intersect the line""" + cells = set() + prevCell = None + for coord in coords: + cell = h3.geo_to_h3(coord[1], coord[0], res) + cells.add(cell) + if (prevCell is None): + prevCell = cell + else: + if (prevCell != cell): + # two consecutive coordinates in the linestring may be more than one cell apart + # need to find intermediate cells between previous cell and the current one + if (not h3.h3_indexes_are_neighbors(prevCell, cell)): + intermediateCells = h3.h3_line(prevCell, cell) + for intermediateCell in intermediateCells: + cells.add(intermediateCell) + prevCell = cell + return cells + +def get_intersecting_h3_cells_for_geo_json(geometry: Any, res:int) -> Iterable[str]: + """gets all h3 cells of given resolution that intersect the geometry.""" + # h3 api wants two floats for point, geojson dict for polygon and custom code is needed for line and multi* geometries + geojson = mapping(geometry) if isinstance(geometry, BaseGeometry) else geometry + geom_type = geojson["type"] + coords = geojson["coordinates"] + if (geom_type.startswith("Multi")): + sub_geom_type = geom_type.replace("Multi", "") + sub_geoms = [{"type": sub_geom_type, "coordinates": sub_geom_coords } for sub_geom_coords in coords] + sub_cells = [sub_cell for sub_geom in sub_geoms for sub_cell in get_intersecting_h3_cells_for_geo_json(sub_geom, res)] + return set(sub_cells) + if (geom_type == "Point"): + return set([h3.geo_to_h3(coords[1], coords[0], res)]) + if (geom_type == "LineString"): + return get_intersecting_h3_cells_for_line(coords, res) + if (geom_type == "Polygon"): + innerCells = h3.polyfill(geojson, res, True) # this only covers the tiles whose centers are inside the polygon + boundaryCells = get_intersecting_h3_cells_for_line(coords[0], res) + return innerCells | boundaryCells + +def matches_properties_filter(feature: Dict[str, Any], properties_filter: Dict[str, Any]) -> bool: + if properties_filter is None: + return True + feat_props = feature.get("properties") + for prop in properties_filter: + if prop == "id": + return feature.get("id") == properties_filter[prop] + + if not prop in feat_props or (properties_filter[prop] != "*" and feat_props[prop] != properties_filter[prop]): + return False + return True + +def get_matchable_feature(feature_dict: Dict[str, Any]) -> MatchableFeature: + """creates a MatchableFeature from a dict with expected keys [id, geometry, properties], which could be either a geojson or parsed from a csv file with wkt geometry""" + id = feature_dict.get("id") + geom = feature_dict.get("geometry") + if type(geom) is dict and "type" in geom and "coordinates" in geom: + # if it"s a geojson feature + s = shape(geom) + elif isinstance(geom, str): + # if it"s a wkt string + s = wkt.loads(geom) + props = feature_dict.get("properties") + return MatchableFeature(id, s, props) + +def get_feature_cells(geom: Any, res: int, k_rings_to_add:int=1): + """gets all h3 cells of given resolution that intersect the geometry, and also the cells that are k rings around the intersecting cells""" + h3_cells = get_intersecting_h3_cells_for_geo_json(geom, res) + if k_rings_to_add == 0: + return list(h3_cells) + + rings = [h3.k_ring(h, k_rings_to_add) for h in h3_cells] + return list(set(cell for r in rings for cell in r)) + +def parse_geojson(filename: str, is_multiline: bool) -> Iterable[Dict[str, Any]]: + with open(filename, mode="r", errors="ignore") as file: + if is_multiline: + # text file with one geojson per line + i=0 + features = [] + for line in file: + i += 1 + try: + geojson = json.loads(line.strip().rstrip(",")) + features.append(geojson) + except Exception as x: + print(fr"Line {i}: " + str(x)) + return features + else: + full_gj = json.loads(file.read()) + if full_gj.get("type") == "FeatureCollection": + return full_gj.get("features") + else: + return [full_gj] + +def get_matchable_set(features: Iterable[Dict[str, Any]], properties_filter: dict=None, res: int=12, limit_feature_count=-1) -> MatchableFeaturesSet: + features_by_id = {} + cells_by_id = {} + features_by_cell = {} + for feature_dict in features: + try: + if not matches_properties_filter(feature_dict, properties_filter): + continue + + feature = get_matchable_feature(feature_dict) + features_by_id[feature.id] = feature + cells_by_id[feature.id] = get_feature_cells(feature.geometry, res) + for cell in cells_by_id[feature.id]: + if not cell in features_by_cell: + features_by_cell[cell] = [] + features_by_cell[cell].append(feature) + except Exception as x: + print(str(x)) + + if limit_feature_count > 0 and len(features_by_id) >= limit_feature_count: + break + return MatchableFeaturesSet(features_by_id, cells_by_id, features_by_cell) + +def parse_csv(filename: str, delimiter: str=",") -> MatchableFeaturesSet: + features = [] + i=0 + with open(filename, mode="r", errors="ignore") as file: + reader = csv.DictReader(file, delimiter=delimiter) + for row in reader: + feat_dict = {} + feat_dict["properties"] = {} + for k, v in row.items(): + default_id = str(i) + i += 1 + key = k.lower() + if not "id" in feat_dict and "id" in key: + feat_dict["id"] = v + elif not "geometry" in feat_dict and ("geometry" in key or "wkt" in key): + feat_dict["geometry"] = v + else: + feat_dict["properties"][k] = v + + if not "id" in feat_dict: + feat_dict["id"] = default_id + + if not "geometry" in feat_dict: + if "lat" in feat_dict["properties"] and "lon" in feat_dict["properties"]: + feat_dict["geometry"] = f'POINT({feat_dict["properties"]["lon"]} {feat_dict["properties"]["lat"]})' + else: + continue + + features.append(feat_dict) + return features + +def load_matchable_set(filename: str, properties_filter: dict=None, res: int=12, limit_feature_count=-1, is_multiline: bool=False, delimiter: str=",") -> MatchableFeaturesSet: + """loads a MatchableFeaturesSet from a geojson or csv file""" + extension = filename.split(".")[-1] + match extension: + case "geojson" | "json": + features = parse_geojson(filename, is_multiline=is_multiline) + case "csv": + features = parse_csv(filename, delimiter=delimiter) + case _: + raise Exception(f"Unsupported file type: {extension}") + + s = get_matchable_set(features, properties_filter, res, limit_feature_count) + return s + +def get_features_with_cells(features_by_cell: Dict[str, Iterable[MatchableFeature]], cells_filter: Iterable[str]) -> Iterable[MatchableFeature]: + """gets all features in `features_by_cell` that intersect any of the cells in `cells_filter`""" + with_cells = [] + candidate_ids = set() + for cell in cells_filter: + if cell in features_by_cell: + for candidate in features_by_cell[cell]: + if not candidate.id in candidate_ids: + candidate_ids.add(candidate.id) + with_cells.append(candidate) + return with_cells + +def write_json(results_json: Any, output_file_name: str): + with open(output_file_name, "w") as f: json.dump(results_json, f, indent=4) \ No newline at end of file diff --git a/reference/counterexamples/divisions/division_area/bad-missing-is-land.yaml b/reference/counterexamples/divisions/division_area/bad-missing-is-land.yaml index 3c7018342..c7bfe5675 100644 --- a/reference/counterexamples/divisions/division_area/bad-missing-is-land.yaml +++ b/reference/counterexamples/divisions/division_area/bad-missing-is-land.yaml @@ -1,14 +1,14 @@ ---- -id: counterexample:division_area:bad-is-territorial -type: Feature -geometry: - type: LineString - coordinates: [[0, 1], [1, 2]] -properties: - theme: divisions - type: division_area - version: 0 - subtype: country - division_id: counterexample:division_boundary:some-division - is_territorial: false +--- +id: counterexample:division_area:bad-is-territorial +type: Feature +geometry: + type: LineString + coordinates: [[0, 1], [1, 2]] +properties: + theme: divisions + type: division_area + version: 0 + subtype: country + division_id: counterexample:division_boundary:some-division + is_territorial: false country: ZZ \ No newline at end of file diff --git a/reference/counterexamples/transportation/segment/road/bad-road-destinations.yaml b/reference/counterexamples/transportation/segment/road/bad-road-destinations.yaml index a52da0d0b..d30e98f59 100644 --- a/reference/counterexamples/transportation/segment/road/bad-road-destinations.yaml +++ b/reference/counterexamples/transportation/segment/road/bad-road-destinations.yaml @@ -1,52 +1,52 @@ ---- -id: overture:transportation:segment:example:destinations:1 -type: Feature -geometry: - type: LineString - coordinates: [[0, 0], [1, 1]] -properties: - theme: transportation - type: segment - subtype: road - class: secondary - version: 0 - connectors: - - connector_id: overture:transportation:connector:123 - at: 0 - - connector_id: overture:transportation:connector:678 - at: 1 - destinations: - - labels: - - value: Seattle - type: unknown - - value: Seattle - type: unknown - - value: Redmond - - value: - foo: bar - type: unknown - - value: Bellevue - type: other - - value: Main Street - type: street - - value: I90 - type: route - symbols: - - airport - from: - - segment_id: overture:transportation:segment:234 - - segment_id: overture:transportation:segment:567 - connector_id: overture:transportation:connector:567 - foo: bar - when: - heading: forward - to_connector_id: overture:transportation:connector:123 - final_heading: backward - - labels: - - value: Kirkland - type: unknown - from: - - segment_id: overture:transportation:segment:567 - connector_id: overture:transportation:connector:567 - ext_expected_errors: - - "items at 0 and 1 are equal" +--- +id: overture:transportation:segment:example:destinations:1 +type: Feature +geometry: + type: LineString + coordinates: [[0, 0], [1, 1]] +properties: + theme: transportation + type: segment + subtype: road + class: secondary + version: 0 + connectors: + - connector_id: overture:transportation:connector:123 + at: 0 + - connector_id: overture:transportation:connector:678 + at: 1 + destinations: + - labels: + - value: Seattle + type: unknown + - value: Seattle + type: unknown + - value: Redmond + - value: + foo: bar + type: unknown + - value: Bellevue + type: other + - value: Main Street + type: street + - value: I90 + type: route + symbols: + - airport + from: + - segment_id: overture:transportation:segment:234 + - segment_id: overture:transportation:segment:567 + connector_id: overture:transportation:connector:567 + foo: bar + when: + heading: forward + to_connector_id: overture:transportation:connector:123 + final_heading: backward + - labels: + - value: Kirkland + type: unknown + from: + - segment_id: overture:transportation:segment:567 + connector_id: overture:transportation:connector:567 + ext_expected_errors: + - "items at 0 and 1 are equal" From 720291d8684b7c8c26757d1849c1617405c66c02 Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 21 May 2026 12:34:51 -0400 Subject: [PATCH 2/2] [FEATURE] Branching strategy Phase 2.A - version computing & baselining Introduce reusable GitHub Actions for CodeArtifact credentials and version computation. Baseline all packages to static versions (overture-schema at 1.17.0, others at 0.1.0). Add dry-run workflow for version visibility. Migrate callers off legacy code-artifact.sh. Closes #508 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/actions/code-artifact/action.yml | 67 ++++++++++ .github/actions/compute-version/action.yml | 114 ++++++++++++++++ .../check-python-package-versions.yaml | 2 - .../workflows/compute-versions-dry-run.yaml | 124 ++++++++++++++++++ .../workflows/publish-python-packages.yaml | 12 +- ...eusable-check-python-package-versions.yaml | 17 +-- .github/workflows/scripts/code-artifact.sh | 60 --------- .github/workflows/scripts/package-versions.py | 2 +- CONTRIBUTING.md | 15 ++- .../pyproject.toml | 5 +- .../overture/schema/addresses/__about__.py | 7 +- packages/overture-schema-annex/pyproject.toml | 5 +- .../src/overture/schema/__about__.py | 7 +- .../overture-schema-base-theme/pyproject.toml | 5 +- .../src/overture/schema/base/__about__.py | 7 +- .../pyproject.toml | 5 +- .../overture/schema/buildings/__about__.py | 7 +- packages/overture-schema-cli/pyproject.toml | 5 +- .../src/overture/schema/cli/__about__.py | 7 +- .../overture-schema-codegen/pyproject.toml | 5 +- .../src/overture/schema/codegen/__about__.py | 7 +- .../overture-schema-common/pyproject.toml | 5 +- .../src/overture/schema/common/__about__.py | 7 +- .../pyproject.toml | 5 +- .../overture/schema/divisions/__about__.py | 7 +- .../pyproject.toml | 5 +- .../src/overture/schema/places/__about__.py | 7 +- .../overture-schema-system/pyproject.toml | 5 +- .../src/overture/schema/system/__about__.py | 7 +- .../pyproject.toml | 5 +- .../schema/transportation/__about__.py | 7 +- packages/overture-schema/pyproject.toml | 5 +- .../src/overture/schema/__about__.py | 7 +- uv.lock | 14 +- 34 files changed, 425 insertions(+), 146 deletions(-) create mode 100644 .github/actions/code-artifact/action.yml create mode 100644 .github/actions/compute-version/action.yml create mode 100644 .github/workflows/compute-versions-dry-run.yaml delete mode 100755 .github/workflows/scripts/code-artifact.sh diff --git a/.github/actions/code-artifact/action.yml b/.github/actions/code-artifact/action.yml new file mode 100644 index 000000000..d64b607c9 --- /dev/null +++ b/.github/actions/code-artifact/action.yml @@ -0,0 +1,67 @@ +name: CodeArtifact credentials +description: > + Retrieves an authorization token and constructs index/publish URLs for AWS + CodeArtifact. Assumes AWS credentials are already configured in the job. + +inputs: + aws_account_id: + description: AWS account ID that owns the CodeArtifact domain. + required: false + default: "505071440022" + aws_region: + description: AWS region where the CodeArtifact repository is hosted. + required: false + default: us-west-2 + domain: + description: CodeArtifact domain name. + required: false + default: overture-pypi + repository: + description: CodeArtifact repository name. + required: false + default: overture + +outputs: + token: + description: CodeArtifact authorization token (masked in logs). + value: ${{ steps.creds.outputs.token }} + index_url: + description: > + Full index URL with embedded credentials, suitable for + `--index-url` / `--extra-index-url` in pip/uv. + value: ${{ steps.creds.outputs.index_url }} + publish_url: + description: > + Publish endpoint URL (no credentials embedded — pass token separately). + value: ${{ steps.creds.outputs.publish_url }} + +runs: + using: composite + steps: + - name: Get CodeArtifact credentials + id: creds + shell: bash + env: + AWS_ACCOUNT_ID: ${{ inputs.aws_account_id }} + AWS_REGION: ${{ inputs.aws_region }} + DOMAIN: ${{ inputs.domain }} + REPOSITORY: ${{ inputs.repository }} + run: | + set -euo pipefail + + token=$(aws codeartifact get-authorization-token \ + --region "$AWS_REGION" \ + --domain "$DOMAIN" \ + --domain-owner "$AWS_ACCOUNT_ID" \ + --query authorizationToken \ + --output text) + echo "::add-mask::${token}" + echo "token=${token}" >> "$GITHUB_OUTPUT" + + base_url="https://${DOMAIN}-${AWS_ACCOUNT_ID}.d.codeartifact.${AWS_REGION}.amazonaws.com/pypi/${REPOSITORY}" + + index_url="https://aws:${token}@${DOMAIN}-${AWS_ACCOUNT_ID}.d.codeartifact.${AWS_REGION}.amazonaws.com/pypi/${REPOSITORY}/simple/" + echo "::add-mask::${index_url}" + echo "index_url=${index_url}" >> "$GITHUB_OUTPUT" + + echo "publish_url=${base_url}" >> "$GITHUB_OUTPUT" diff --git a/.github/actions/compute-version/action.yml b/.github/actions/compute-version/action.yml new file mode 100644 index 000000000..8d639f42b --- /dev/null +++ b/.github/actions/compute-version/action.yml @@ -0,0 +1,114 @@ +name: Compute package version +description: > + Computes the version string for a package given branch context. + + Contexts: + - `vnext`: `+dev.` (PEP 440 local version). + Falls back to `..0+dev.` if never published. + Local versions are rejected by PyPI — only suitable for private indexes + like CodeArtifact. + - `main`: `..` — increments the highest + published patch for the same major.minor series. + - `main-bump`: `..0` — used when a major/minor bump commit + lands on main (patch resets to 0). + + Prerequisites: repo must be checked out and `uv` must be available. + +inputs: + package: + description: Distribution package name (e.g. overture-schema-common). + required: true + context: + description: > + Branch context controlling the version formula. + Supported values: `vnext`, `main`, `main-bump`. + required: true + index_url: + description: > + PyPI simple index URL with embedded credentials for querying + CodeArtifact. Obtain via the `.github/actions/code-artifact` action's + `index_url` output after configuring AWS credentials. + required: true + +outputs: + version: + description: Computed version string (PEP 440). + value: ${{ steps.compute.outputs.version }} + +runs: + using: composite + steps: + - name: Compute version + id: compute + shell: bash + env: + PACKAGE: ${{ inputs.package }} + CONTEXT: ${{ inputs.context }} + INDEX_URL: ${{ inputs.index_url }} + RUN_NUMBER: ${{ github.run_number }} + run: | + set -euo pipefail + + # --- Read seed version from pyproject.toml --- + SEED=$(cd "packages/${PACKAGE}" && uv version --short) + MAJOR_MINOR=$(echo "$SEED" | grep -oE '^[0-9]+\.[0-9]+') + echo "Seed version for ${PACKAGE}: ${SEED} (major.minor: ${MAJOR_MINOR})" + + # --- Query CodeArtifact for the latest published version --- + # uv pip compile resolves the latest matching version from the index. + # We constrain to the current major.minor series for `main` context. + resolve_latest() { + local constraint="$1" + local output + # uv pip compile exits non-zero on network/auth errors; let those surface. + # "Could not find a version" is a normal empty-result — grep returns 1, which is OK. + output=$(echo "$constraint" \ + | uv pip compile - --index-url "$INDEX_URL" --no-deps --quiet 2>&1) || { + echo "ERROR: uv pip compile failed for '${constraint}':" >&2 + echo "$output" >&2 + exit 1 + } + echo "$output" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || true + } + + # --- Compute version based on context --- + case "$CONTEXT" in + vnext) + LATEST=$(resolve_latest "$PACKAGE") + if [ -n "$LATEST" ]; then + BASE="$LATEST" + echo "Latest published version: ${LATEST}" + else + BASE="${MAJOR_MINOR}.0" + echo "No published version found — falling back to ${BASE}" + fi + VERSION="${BASE}+dev.${RUN_NUMBER}" + ;; + + main) + # Resolve the highest patch within the current major.minor series. + LATEST_IN_SERIES=$(resolve_latest "${PACKAGE}>=${MAJOR_MINOR}.0,<${MAJOR_MINOR}.99999") + if [ -n "$LATEST_IN_SERIES" ]; then + CURRENT_PATCH=$(echo "$LATEST_IN_SERIES" | grep -oE '[0-9]+$') + NEXT_PATCH=$((CURRENT_PATCH + 1)) + echo "Latest in ${MAJOR_MINOR}.x series: ${LATEST_IN_SERIES} → next patch: ${NEXT_PATCH}" + else + NEXT_PATCH=0 + echo "No published version in ${MAJOR_MINOR}.x series — starting at patch 0" + fi + VERSION="${MAJOR_MINOR}.${NEXT_PATCH}" + ;; + + main-bump) + VERSION="${MAJOR_MINOR}.0" + echo "Major/minor bump — patch resets to 0" + ;; + + *) + echo "::error::Unknown context '${CONTEXT}'. Supported: vnext, main, main-bump." + exit 1 + ;; + esac + + echo "Computed version for ${PACKAGE} (${CONTEXT}): ${VERSION}" + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/check-python-package-versions.yaml b/.github/workflows/check-python-package-versions.yaml index 0ea47f1d3..1277c9dcf 100644 --- a/.github/workflows/check-python-package-versions.yaml +++ b/.github/workflows/check-python-package-versions.yaml @@ -4,8 +4,6 @@ on: pull_request: paths: - '**/pyproject.toml' - - 'packages/**/__about__.py' - permissions: contents: read diff --git a/.github/workflows/compute-versions-dry-run.yaml b/.github/workflows/compute-versions-dry-run.yaml new file mode 100644 index 000000000..962556b67 --- /dev/null +++ b/.github/workflows/compute-versions-dry-run.yaml @@ -0,0 +1,124 @@ +name: Compute versions (dry run) + +# Runs on pushes to vnext and main. Computes and logs the version that would +# be published for each affected package — but does not build or publish. +# Remove this workflow once Phase 3 publish workflows are live. + +on: + push: + branches: [main, vnext] + paths: + - '**/pyproject.toml' + workflow_dispatch: + inputs: + context: + description: "Version context to simulate" + type: choice + options: [vnext, main, main-bump] + default: vnext + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + compute-versions: + name: Compute versions + if: github.event.repository.full_name == github.repository + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # Required for OIDC authentication to AWS + + steps: + - name: Install uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + with: + version: latest + + - name: Check out code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Determine context + id: context + env: + INPUT_CONTEXT: ${{ inputs.context }} + REF_NAME: ${{ github.ref_name }} + run: | + if [ -n "$INPUT_CONTEXT" ]; then + echo "value=$INPUT_CONTEXT" >> "$GITHUB_OUTPUT" + elif [ "$REF_NAME" = "vnext" ]; then + echo "value=vnext" >> "$GITHUB_OUTPUT" + else + echo "value=main" >> "$GITHUB_OUTPUT" + fi + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@d979d5b3a71173a29b74b5b88418bfda9437d885 # v6.1.1 + with: + aws-region: us-west-2 + role-to-assume: arn:aws:iam::505071440022:role/GithubActions_Schema_CodeArtifact_ReadOnly + role-session-name: GitHubActions_${{github.job}}_${{github.run_id}} + + - name: Get CodeArtifact credentials + id: ca + uses: ./.github/actions/code-artifact + + - name: Compute version for each package + env: + CONTEXT: ${{ steps.context.outputs.value }} + INDEX_URL: ${{ steps.ca.outputs.index_url }} + run: | + echo "## Computed versions (context: ${CONTEXT})" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "| Package | Version |" >> "$GITHUB_STEP_SUMMARY" + echo "|---------|---------|" >> "$GITHUB_STEP_SUMMARY" + + for pkg_dir in packages/overture-schema*/; do + pkg=$(basename "$pkg_dir") + [ -f "${pkg_dir}/pyproject.toml" ] || continue + + SEED=$(cd "$pkg_dir" && uv version --short) + MAJOR_MINOR=$(echo "$SEED" | grep -oE '^[0-9]+\.[0-9]+') + + # Resolve latest from CA + resolve_latest() { + local output + output=$(echo "$1" \ + | uv pip compile - --index-url "$INDEX_URL" --no-deps --quiet 2>&1) || { + echo "ERROR: uv pip compile failed for '$1':" >&2 + echo "$output" >&2 + exit 1 + } + echo "$output" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || true + } + + case "$CONTEXT" in + vnext) + LATEST=$(resolve_latest "$pkg") + BASE="${LATEST:-${MAJOR_MINOR}.0}" + VERSION="${BASE}+dev.${GITHUB_RUN_NUMBER}" + ;; + main) + LATEST_IN_SERIES=$(resolve_latest "${pkg}>=${MAJOR_MINOR}.0,<${MAJOR_MINOR}.99999") + if [ -n "$LATEST_IN_SERIES" ]; then + CURRENT_PATCH=$(echo "$LATEST_IN_SERIES" | grep -oE '[0-9]+$') + NEXT_PATCH=$((CURRENT_PATCH + 1)) + else + NEXT_PATCH=0 + fi + VERSION="${MAJOR_MINOR}.${NEXT_PATCH}" + ;; + main-bump) + VERSION="${MAJOR_MINOR}.0" + ;; + esac + + echo "| \`${pkg}\` | \`${VERSION}\` |" >> "$GITHUB_STEP_SUMMARY" + echo " ${pkg} → ${VERSION}" + done diff --git a/.github/workflows/publish-python-packages.yaml b/.github/workflows/publish-python-packages.yaml index ed2dcfd88..1246c4736 100644 --- a/.github/workflows/publish-python-packages.yaml +++ b/.github/workflows/publish-python-packages.yaml @@ -5,7 +5,6 @@ on: branches: [main] paths: - '**/pyproject.toml' - - 'packages/**/__about__.py' workflow_dispatch: inputs: aws_iam_role_name: @@ -75,16 +74,9 @@ jobs: role-to-assume: arn:aws:iam::505071440022:role/GithubActions_Schema_CodeArtifact_Publish role-session-name: GitHubActions_${{github.job}}_${{github.run_id}} - - name: Get CodeArtifact publish URL + - name: Get CodeArtifact credentials id: get-code-artifact-params - run: | - token=$(./.github/workflows/scripts/code-artifact.sh token \ - 505071440022 us-west-2 overture-pypi) - echo "::add-mask::${token}" - echo "token=${token}" >> $GITHUB_OUTPUT - publish_url=$(./.github/workflows/scripts/code-artifact.sh publish-url \ - 505071440022 us-west-2 overture-pypi overture) - echo "publish_url=${publish_url}" >> $GITHUB_OUTPUT + uses: ./.github/actions/code-artifact - name: Publish package ${{ matrix.package }} version ${{ matrix.after }} to PyPI env: diff --git a/.github/workflows/reusable-check-python-package-versions.yaml b/.github/workflows/reusable-check-python-package-versions.yaml index 19bf5ef30..1ce727507 100644 --- a/.github/workflows/reusable-check-python-package-versions.yaml +++ b/.github/workflows/reusable-check-python-package-versions.yaml @@ -133,17 +133,12 @@ jobs: - name: Get CodeArtifact index URL id: get-code-artifact-index-url if: steps.save-changes.outputs.num_changed_packages > 0 - env: - AWS_ACCOUNT_ID: ${{ inputs.aws_account_id }} - AWS_REGION: ${{ inputs.aws_region }} - DOMAIN: ${{ inputs.domain }} - REPOSITORY: ${{ inputs.repository }} - run: | - index_url=$(./.github/workflows/scripts/code-artifact.sh index-url \ - "$AWS_ACCOUNT_ID" "$AWS_REGION" \ - "$DOMAIN" "$REPOSITORY") - echo "::add-mask::${index_url}" - echo "index_url=${index_url}" >> $GITHUB_OUTPUT + uses: ./.github/actions/code-artifact + with: + aws_account_id: ${{ inputs.aws_account_id }} + aws_region: ${{ inputs.aws_region }} + domain: ${{ inputs.domain }} + repository: ${{ inputs.repository }} - name: Fail if any of the new versions already exist in the repo if: steps.save-changes.outputs.num_changed_packages > 0 diff --git a/.github/workflows/scripts/code-artifact.sh b/.github/workflows/scripts/code-artifact.sh deleted file mode 100755 index 488bdc5b0..000000000 --- a/.github/workflows/scripts/code-artifact.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -readonly subcommand="$1" - -function token() { - local -r aws_account_id="$1" - local -r aws_region="$2" - local -r domain="$3" - - aws codeartifact get-authorization-token \ - --region "$aws_region" \ - --domain "$domain" \ - --domain-owner "$aws_account_id" \ - --query authorizationToken \ - --output text -} - -function repo_url() { - local -r token="$1" - local -r credentials="${token:+aws:$token@}" - local -r aws_account_id="$2" - local -r aws_region="$3" - local -r domain="$4" - local -r repository="$5" - local -r suffix="$6" - - printf "https://%s%s-%s.d.codeartifact.%s.amazonaws.com/pypi/%s%s\n" \ - "$credentials" "$domain" "$aws_account_id" "$aws_region" "$repository" "$suffix" -} - -case "$subcommand" in - token) - if [ $# -ne 4 ]; then - >&2 echo "Usage: $0 token " - exit 1 - fi - token "$2" "$3" "$4" - ;; - - index-url|publish-url) - if [ $# -ne 5 ]; then - >&2 echo "Usage: $0 $subcommand " - exit 1 - fi - - if [ "$subcommand" = "index-url" ]; then - repo_url "$(token "$2" "$3" "$4")" "$2" "$3" "$4" "$5" "/simple/" - else - repo_url "" "$2" "$3" "$4" "$5" "" - fi - ;; - - *) - >&2 echo "Unknown subcommand: ${subcommand:-}" - >&2 echo "Valid subcommands: token | index-url | publish-url" - exit 1 - ;; -esac diff --git a/.github/workflows/scripts/package-versions.py b/.github/workflows/scripts/package-versions.py index c28ad8520..2be0b735a 100755 --- a/.github/workflows/scripts/package-versions.py +++ b/.github/workflows/scripts/package-versions.py @@ -20,7 +20,7 @@ def collect(): packages = sorted( d.name for d in packages_dir.iterdir() - if d.is_dir() and d.name.startswith("overture-schema") + if d.is_dir() and d.name.startswith("overture-schema") and (d / "pyproject.toml").exists() ) package_versions = [ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c5e2a9525..b0abd3f3e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -115,6 +115,12 @@ If the automatic rebase fails, a GitHub issue is opened and assigned to the auth > `git pull --rebase` (or `git fetch origin && git rebase origin/vnext`) on your branch before > pushing again. +### Version dry-run (informational) + +After each push to `main` or `vnext`, CI runs the `compute-versions-dry-run` workflow. It logs what package versions **would** be stamped at publish time — no artifacts are actually produced. Check the workflow's job summary for a table of computed versions. + +This workflow will be replaced by actual publish workflows in Phase 3. + ## Migration Notes When Phases 0-4 are complete, this area can be removed in favor of more permanent documentation. @@ -134,7 +140,14 @@ If your fork still references `dev` or `staging`, update your remotes accordingl - vnext compatibility check added: every PR to `main` verifies that `vnext` can rebase cleanly on top; posts exact fix commands on conflict. - Post-merge automatic rebase added: `vnext` is force-rebased onto `main` after every merge; if it fails, a GitHub issue is opened. -### [Phase 2](https://github.com/OvertureMaps/schema/issues/508) +### [Phase 2.A](https://github.com/OvertureMaps/schema/issues/508), May 2026 + +- All packages baselined with static versions in `pyproject.toml` (`overture-schema` at `1.17.1`, others at `0.1.1`). +- `compute-version` composite action added: computes PEP 440 versions for vnext (dev), main (patch), and main-bump (reset) contexts. +- `code-artifact` composite action added: replaces the legacy shell script for AWS CodeArtifact auth. +- `compute-versions-dry-run` workflow added for version visibility until Phase 3 publish workflows land. + +### [Phase 2.B](https://github.com/OvertureMaps/schema/issues/533) - WIP / Pending diff --git a/packages/overture-schema-addresses-theme/pyproject.toml b/packages/overture-schema-addresses-theme/pyproject.toml index a550ec79f..9efd0ce6e 100644 --- a/packages/overture-schema-addresses-theme/pyproject.toml +++ b/packages/overture-schema-addresses-theme/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pydantic>=2.12.0", ] description = "Overture Maps addresses theme models and structures" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-addresses-theme" readme = "README.md" @@ -28,9 +28,6 @@ overture-schema-system = { workspace = true } build-backend = "hatchling.build" requires = ["hatchling"] -[tool.hatch.version] -path = "src/overture/schema/addresses/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-addresses-theme/src/overture/schema/addresses/__about__.py b/packages/overture-schema-addresses-theme/src/overture/schema/addresses/__about__.py index 5e3bb11ca..73d661b36 100644 --- a/packages/overture-schema-addresses-theme/src/overture/schema/addresses/__about__.py +++ b/packages/overture-schema-addresses-theme/src/overture/schema/addresses/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-addresses-theme") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-annex/pyproject.toml b/packages/overture-schema-annex/pyproject.toml index 95a45d362..af7ea058d 100644 --- a/packages/overture-schema-annex/pyproject.toml +++ b/packages/overture-schema-annex/pyproject.toml @@ -4,7 +4,7 @@ maintainers = [ ] dependencies = ["overture-schema-common", "overture-schema-system", "pydantic>=2.12.0"] description = "Add your description here" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-annex" readme = "README.md" @@ -23,9 +23,6 @@ Issues = "https://github.com/OvertureMaps/schema/issues" build-backend = "hatchling.build" requires = ["hatchling"] -[tool.hatch.version] -path = "src/overture/schema/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-annex/src/overture/schema/__about__.py b/packages/overture-schema-annex/src/overture/schema/__about__.py index 5e3bb11ca..6e94b6bec 100644 --- a/packages/overture-schema-annex/src/overture/schema/__about__.py +++ b/packages/overture-schema-annex/src/overture/schema/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-annex") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-base-theme/pyproject.toml b/packages/overture-schema-base-theme/pyproject.toml index c671e1d88..cd0644108 100644 --- a/packages/overture-schema-base-theme/pyproject.toml +++ b/packages/overture-schema-base-theme/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pydantic>=2.12.0", ] description = "Overture Maps base theme shared structures and models (bathymetry, infrastructure, land, land_cover, land_use, water)" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-base-theme" readme = "README.md" @@ -28,9 +28,6 @@ overture-schema-system = { workspace = true } build-backend = "hatchling.build" requires = ["hatchling"] -[tool.hatch.version] -path = "src/overture/schema/base/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-base-theme/src/overture/schema/base/__about__.py b/packages/overture-schema-base-theme/src/overture/schema/base/__about__.py index 5e3bb11ca..89a6dd9ca 100644 --- a/packages/overture-schema-base-theme/src/overture/schema/base/__about__.py +++ b/packages/overture-schema-base-theme/src/overture/schema/base/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-base-theme") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-buildings-theme/pyproject.toml b/packages/overture-schema-buildings-theme/pyproject.toml index 833b67a23..2ab6a3352 100644 --- a/packages/overture-schema-buildings-theme/pyproject.toml +++ b/packages/overture-schema-buildings-theme/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pydantic>=2.12.0", ] description = "Overture Maps buildings theme shared structures, building types, and building part types" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-buildings-theme" readme = "README.md" @@ -28,9 +28,6 @@ overture-schema-system = { workspace = true } build-backend = "hatchling.build" requires = ["hatchling"] -[tool.hatch.version] -path = "src/overture/schema/buildings/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-buildings-theme/src/overture/schema/buildings/__about__.py b/packages/overture-schema-buildings-theme/src/overture/schema/buildings/__about__.py index 5e3bb11ca..6b9d493bf 100644 --- a/packages/overture-schema-buildings-theme/src/overture/schema/buildings/__about__.py +++ b/packages/overture-schema-buildings-theme/src/overture/schema/buildings/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-buildings-theme") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-cli/pyproject.toml b/packages/overture-schema-cli/pyproject.toml index dd0aa1bd8..f32fd78ef 100644 --- a/packages/overture-schema-cli/pyproject.toml +++ b/packages/overture-schema-cli/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "yamlcore>=0.0.4", ] description = "Command-line interface for Overture Maps schema validation and JSON Schema generation" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-cli" readme = "README.md" @@ -38,9 +38,6 @@ dev = [ "mypy>=1.17.0", ] -[tool.hatch.version] -path = "src/overture/schema/cli/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-cli/src/overture/schema/cli/__about__.py b/packages/overture-schema-cli/src/overture/schema/cli/__about__.py index 5e3bb11ca..302de4ff4 100644 --- a/packages/overture-schema-cli/src/overture/schema/cli/__about__.py +++ b/packages/overture-schema-cli/src/overture/schema/cli/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-cli") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-codegen/pyproject.toml b/packages/overture-schema-codegen/pyproject.toml index 3019a6a92..816223318 100644 --- a/packages/overture-schema-codegen/pyproject.toml +++ b/packages/overture-schema-codegen/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "tomli>=2.0; python_version < '3.11'", ] description = "Code generator that produces documentation and code from Pydantic models" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-codegen" @@ -24,8 +24,5 @@ overture-schema-cli = { workspace = true } overture-schema-common = { workspace = true } overture-schema-system = { workspace = true } -[tool.hatch.version] -path = "src/overture/schema/codegen/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py index 3dc1f76bc..2fce0d82a 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.0" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-codegen") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-common/pyproject.toml b/packages/overture-schema-common/pyproject.toml index de356fa4a..8daaa6dac 100644 --- a/packages/overture-schema-common/pyproject.toml +++ b/packages/overture-schema-common/pyproject.toml @@ -7,7 +7,7 @@ name = "overture-schema-common" maintainers = [ {name = "Overture Maps Schema Working Group"}, ] -dynamic = ["version"] +version = "0.1.1" description = "Common components that are shared across Overture theme schemas" license = "MIT" dependencies = [ @@ -24,9 +24,6 @@ Issues = "https://github.com/OvertureMaps/schema/issues" [tool.uv.sources] overture-schema-system = { workspace = true } -[tool.hatch.version] -path = "src/overture/schema/common/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-common/src/overture/schema/common/__about__.py b/packages/overture-schema-common/src/overture/schema/common/__about__.py index 5e3bb11ca..b83d77678 100644 --- a/packages/overture-schema-common/src/overture/schema/common/__about__.py +++ b/packages/overture-schema-common/src/overture/schema/common/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-common") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-divisions-theme/pyproject.toml b/packages/overture-schema-divisions-theme/pyproject.toml index 0314d8d1b..2da546006 100644 --- a/packages/overture-schema-divisions-theme/pyproject.toml +++ b/packages/overture-schema-divisions-theme/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pydantic>=2.12.0", ] description = "Overture Maps divisions theme shared structures, division, division area and division boundary types" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-divisions-theme" readme = "README.md" @@ -27,9 +27,6 @@ overture-schema-system = { workspace = true } build-backend = "hatchling.build" requires = ["hatchling"] -[tool.hatch.version] -path = "src/overture/schema/divisions/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-divisions-theme/src/overture/schema/divisions/__about__.py b/packages/overture-schema-divisions-theme/src/overture/schema/divisions/__about__.py index 5e3bb11ca..f0e512b45 100644 --- a/packages/overture-schema-divisions-theme/src/overture/schema/divisions/__about__.py +++ b/packages/overture-schema-divisions-theme/src/overture/schema/divisions/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-divisions-theme") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-places-theme/pyproject.toml b/packages/overture-schema-places-theme/pyproject.toml index 58fa3dd7a..995fd707a 100644 --- a/packages/overture-schema-places-theme/pyproject.toml +++ b/packages/overture-schema-places-theme/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pydantic[email]>=2.12.0", ] description = "Overture Maps places theme with place type models" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-places-theme" readme = "README.md" @@ -27,9 +27,6 @@ overture-schema-system = { workspace = true } build-backend = "hatchling.build" requires = ["hatchling"] -[tool.hatch.version] -path = "src/overture/schema/places/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-places-theme/src/overture/schema/places/__about__.py b/packages/overture-schema-places-theme/src/overture/schema/places/__about__.py index 5e3bb11ca..bb205eaf6 100644 --- a/packages/overture-schema-places-theme/src/overture/schema/places/__about__.py +++ b/packages/overture-schema-places-theme/src/overture/schema/places/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-places-theme") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-system/pyproject.toml b/packages/overture-schema-system/pyproject.toml index 0646d7a0a..48f01c1d3 100644 --- a/packages/overture-schema-system/pyproject.toml +++ b/packages/overture-schema-system/pyproject.toml @@ -7,7 +7,7 @@ name = "overture-schema-system" maintainers = [ {name = "Overture Maps Schema Working Group"}, ] -dynamic = ["version"] +version = "0.1.1" description = "Foundational types at the base of the Overture Maps schema system" readme = "README.md" requires-python = ">=3.10" @@ -29,9 +29,6 @@ dev = [ "mypy>=1.17.0", ] -[tool.hatch.version] -path = "src/overture/schema/system/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-system/src/overture/schema/system/__about__.py b/packages/overture-schema-system/src/overture/schema/system/__about__.py index 5e3bb11ca..459f0551b 100644 --- a/packages/overture-schema-system/src/overture/schema/system/__about__.py +++ b/packages/overture-schema-system/src/overture/schema/system/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-system") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema-transportation-theme/pyproject.toml b/packages/overture-schema-transportation-theme/pyproject.toml index 547b54401..50407ed4a 100644 --- a/packages/overture-schema-transportation-theme/pyproject.toml +++ b/packages/overture-schema-transportation-theme/pyproject.toml @@ -8,7 +8,7 @@ dependencies = [ "pydantic>=2.12.0", ] description = "Overture Maps transportation theme with shared structures and connector and segment types" -dynamic = ["version"] +version = "0.1.1" license = "MIT" name = "overture-schema-transportation-theme" readme = "README.md" @@ -28,9 +28,6 @@ overture-schema-system = { workspace = true } build-backend = "hatchling.build" requires = ["hatchling"] -[tool.hatch.version] -path = "src/overture/schema/transportation/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema-transportation-theme/src/overture/schema/transportation/__about__.py b/packages/overture-schema-transportation-theme/src/overture/schema/transportation/__about__.py index 5e3bb11ca..214099d7c 100644 --- a/packages/overture-schema-transportation-theme/src/overture/schema/transportation/__about__.py +++ b/packages/overture-schema-transportation-theme/src/overture/schema/transportation/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema-transportation-theme") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/packages/overture-schema/pyproject.toml b/packages/overture-schema/pyproject.toml index eb9bb8940..bb748c46e 100644 --- a/packages/overture-schema/pyproject.toml +++ b/packages/overture-schema/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "overture-schema-cli", ] description = "Complete Overture Maps schema collection with all themes and types" -dynamic = ["version"] +version = "1.17.1" license = "MIT" name = "overture-schema" readme = "README.md" @@ -47,8 +47,5 @@ dev = [ "yamlcore>=0.0.4", ] -[tool.hatch.version] -path = "src/overture/schema/__about__.py" - [tool.hatch.build.targets.wheel] packages = ["src/overture"] diff --git a/packages/overture-schema/src/overture/schema/__about__.py b/packages/overture-schema/src/overture/schema/__about__.py index 5e3bb11ca..7f4ff3688 100644 --- a/packages/overture-schema/src/overture/schema/__about__.py +++ b/packages/overture-schema/src/overture/schema/__about__.py @@ -1 +1,6 @@ -__version__ = "0.1.1.dev1" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("overture-schema") +except PackageNotFoundError: + __version__ = "unknown" diff --git a/uv.lock b/uv.lock index 0bbdf1cda..458434c6e 100644 --- a/uv.lock +++ b/uv.lock @@ -7,7 +7,7 @@ resolution-markers = [ ] [options] -exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. +exclude-newer = "2026-05-14T16:33:24.6787743Z" exclude-newer-span = "P1W" [manifest] @@ -650,6 +650,7 @@ wheels = [ [[package]] name = "overture-schema" +version = "1.17.0" source = { editable = "packages/overture-schema" } dependencies = [ { name = "overture-schema-addresses-theme" }, @@ -694,6 +695,7 @@ dev = [ [[package]] name = "overture-schema-addresses-theme" +version = "0.1.0" source = { editable = "packages/overture-schema-addresses-theme" } dependencies = [ { name = "overture-schema-common" }, @@ -710,6 +712,7 @@ requires-dist = [ [[package]] name = "overture-schema-annex" +version = "0.1.0" source = { editable = "packages/overture-schema-annex" } dependencies = [ { name = "overture-schema-common" }, @@ -726,6 +729,7 @@ requires-dist = [ [[package]] name = "overture-schema-base-theme" +version = "0.1.0" source = { editable = "packages/overture-schema-base-theme" } dependencies = [ { name = "overture-schema-common" }, @@ -742,6 +746,7 @@ requires-dist = [ [[package]] name = "overture-schema-buildings-theme" +version = "0.1.0" source = { editable = "packages/overture-schema-buildings-theme" } dependencies = [ { name = "overture-schema-common" }, @@ -758,6 +763,7 @@ requires-dist = [ [[package]] name = "overture-schema-cli" +version = "0.1.0" source = { editable = "packages/overture-schema-cli" } dependencies = [ { name = "click" }, @@ -796,6 +802,7 @@ dev = [ [[package]] name = "overture-schema-codegen" +version = "0.1.0" source = { editable = "packages/overture-schema-codegen" } dependencies = [ { name = "click" }, @@ -818,6 +825,7 @@ requires-dist = [ [[package]] name = "overture-schema-common" +version = "0.1.0" source = { editable = "packages/overture-schema-common" } dependencies = [ { name = "overture-schema-system" }, @@ -848,6 +856,7 @@ dev = [ [[package]] name = "overture-schema-divisions-theme" +version = "0.1.0" source = { editable = "packages/overture-schema-divisions-theme" } dependencies = [ { name = "overture-schema-common" }, @@ -864,6 +873,7 @@ requires-dist = [ [[package]] name = "overture-schema-places-theme" +version = "0.1.0" source = { editable = "packages/overture-schema-places-theme" } dependencies = [ { name = "overture-schema-common" }, @@ -880,6 +890,7 @@ requires-dist = [ [[package]] name = "overture-schema-system" +version = "0.1.0" source = { editable = "packages/overture-schema-system" } dependencies = [ { name = "pydantic" }, @@ -908,6 +919,7 @@ dev = [ [[package]] name = "overture-schema-transportation-theme" +version = "0.1.0" source = { editable = "packages/overture-schema-transportation-theme" } dependencies = [ { name = "overture-schema-common" },