diff --git a/bin/lib.sh b/bin/lib.sh index 5a08463..d233d53 100755 --- a/bin/lib.sh +++ b/bin/lib.sh @@ -413,6 +413,7 @@ n/tourism=hotel,motel,guest_house n/shop=gas n/cuisine n/highway=rest_area,services +w/highway=construction w/highway=motorway,motorway_link,trunk,trunk_link,rest_area,services w/amenity=fuel,restaurant,fast_food,cafe,toilets,charging_station w/tourism=hotel,motel,guest_house @@ -447,7 +448,7 @@ oi_filter_pbf() { )" if [[ "$force" != true && -s "$output_pbf" ]]; then if [[ "$(oi_state_read "$state_file" signature 2>/dev/null || true)" == "$expected_signature" ]] \ - || [[ ! "$input_pbf" -nt "$output_pbf" ]]; then + && [[ ! "$input_pbf" -nt "$output_pbf" ]]; then oi_log "Skipping canonical filter; output is current" echo " output: $output_pbf" >&2 oi_state_write "$state_file" \ @@ -485,6 +486,123 @@ oi_filter_pbf() { printf '%s\n' "$output_pbf" } +oi_safe_slug() { + local raw="${1:-}" + local slug + + slug="$( + printf '%s' "$raw" \ + | LC_ALL=C tr -cs 'A-Za-z0-9._-' '-' \ + | sed -e 's/^-*//' -e 's/-*$//' + )" + + if [[ -z "$slug" ]]; then + slug="focus" + fi + + printf '%s\n' "$slug" +} + +oi_expand_bbox() { + local bbox="$1" + local buffer_km="${2:-0}" + + python3 - "$bbox" "$buffer_km" <<'PY' +import math +import sys + +bbox = sys.argv[1] +buffer_km = float(sys.argv[2]) +parts = [float(part.strip()) for part in bbox.split(",") if part.strip()] +if len(parts) != 4: + raise SystemExit("bbox must be LONG1,LAT1,LONG2,LAT2") + +left, bottom, right, top = parts +left, right = sorted((left, right)) +bottom, top = sorted((bottom, top)) + +if buffer_km > 0: + lat_pad = buffer_km / 111.32 + mid_lat = (bottom + top) / 2.0 + lon_scale = max(math.cos(math.radians(mid_lat)) * 111.32, 0.01) + lon_pad = buffer_km / lon_scale + left -= lon_pad + right += lon_pad + bottom -= lat_pad + top += lat_pad + +print(f"{left:.7f},{bottom:.7f},{right:.7f},{top:.7f}") +PY +} + +oi_extract_region_pbf() { + local source_pbf="$1" + local output_pbf="$2" + local bbox="$3" + local buffer_km="${4:-0}" + local force="${5:-false}" + local output_tmp expanded_bbox state_file expected_signature + + source_pbf="$(oi_abs_path "$source_pbf")" + output_pbf="$(oi_managed_path "$output_pbf")" + + if [[ ! -f "$source_pbf" ]]; then + oi_die "source PBF not found: $source_pbf" + fi + + mkdir -p "$(dirname "$output_pbf")" + expanded_bbox="$(oi_expand_bbox "$bbox" "$buffer_km")" + state_file="$(oi_state_file extract "$output_pbf")" + expected_signature="$( + { + oi_file_signature "$source_pbf" + printf 'bbox=%s\n' "$expanded_bbox" + printf 'strategy=complete_ways\n' + } | oi_hash_stdin + )" + + if [[ "$force" != true && -s "$output_pbf" ]]; then + if [[ "$(oi_state_read "$state_file" signature 2>/dev/null || true)" == "$expected_signature" ]] \ + && [[ ! "$source_pbf" -nt "$output_pbf" ]]; then + oi_log "Skipping focused extract; output is current" + echo " bbox: $expanded_bbox" >&2 + echo " output: $output_pbf" >&2 + printf '%s\n' "$output_pbf" + return 0 + fi + fi + + if [[ "$output_pbf" == *.osm.pbf ]]; then + output_tmp="${output_pbf%.osm.pbf}.tmp.$$.osm.pbf" + else + output_tmp="${output_pbf}.tmp.$$" + fi + + oi_log "Extracting focused regional PBF" + echo " source: $source_pbf" >&2 + echo " bbox: $expanded_bbox" >&2 + echo " output: $output_pbf" >&2 + + oi_require_cmd osmium + osmium extract \ + --bbox "$expanded_bbox" \ + --strategy=complete_ways \ + --set-bounds \ + "$source_pbf" \ + -o "$output_tmp" \ + -O + + mv "$output_tmp" "$output_pbf" + oi_state_write "$state_file" \ + signature "$expected_signature" \ + source_pbf "$source_pbf" \ + bbox "$expanded_bbox" \ + output_pbf "$output_pbf" \ + completed_at "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" + + printf '%s\n' "$output_pbf" +} + oi_canonical_tables_exist() { local exists exists="$(oi_db_query "SELECT CASE WHEN to_regclass('public.osm2pgsql_v2_highways') IS NULL THEN 0 ELSE 1 END;" 2>/dev/null || true)" @@ -715,6 +833,7 @@ oi_import_canonical() { oi_assert_canonical_import_ready oi_state_write "$import_state_file" \ signature "$import_signature" \ + source_pbf "$source_pbf" \ import_pbf "$import_pbf" \ mode "$import_mode" \ use_flatnodes "$use_flatnodes" \ @@ -723,10 +842,55 @@ oi_import_canonical() { printf '%s\n' "$import_pbf" } +oi_extract_interstate_relation_cache() { + local import_state_file source_pbf extractor_script cache_path cache_state_file + local cache_signature stored_signature + + import_state_file="$(oi_state_file import "$OI_DATA_ROOT|$OI_DB_NAME")" + source_pbf="$(oi_state_read "$import_state_file" source_pbf 2>/dev/null || true)" + if [[ -z "$source_pbf" || ! -f "$source_pbf" ]]; then + if [[ -f "$OI_DOWNLOAD_DIR/us-latest.osm.pbf" ]]; then + source_pbf="$OI_DOWNLOAD_DIR/us-latest.osm.pbf" + else + printf '%s\n' "" + return 0 + fi + fi + + extractor_script="$REPO_ROOT/tooling/extract_interstate_relations.py" + cache_path="$OI_CACHE_DIR/interstate-relations.tsv" + cache_state_file="$(oi_state_file interstate-relations "$OI_DATA_ROOT|$OI_DB_NAME")" + cache_signature="$( + { + oi_file_signature "$source_pbf" + printf 'extractor=%s\n' "$(oi_hash_files "$extractor_script")" + } | oi_hash_stdin + )" + stored_signature="$(oi_state_read "$cache_state_file" signature 2>/dev/null || true)" + + if [[ -f "$cache_path" && "$stored_signature" == "$cache_signature" ]]; then + printf '%s\n' "$cache_path" + return 0 + fi + + oi_log "Extracting Interstate relation cache from source PBF" + python3 "$extractor_script" \ + --source-pbf "$source_pbf" \ + --output "$cache_path" + + oi_state_write "$cache_state_file" \ + signature "$cache_signature" \ + source_pbf "$source_pbf" \ + cache_path "$cache_path" \ + completed_at "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" + + printf '%s\n' "$cache_path" +} + oi_apply_derive() { local derive_file="$REPO_ROOT/schema/derive.sql" local derive_state_file import_state_file import_signature derive_signature derive_sql_signature derive_code_signature - local reachability_signature + local reachability_signature relation_cache_file relation_cache_signature local -a derive_source_files=() oi_guard_no_reachability_clears "$derive_file" @@ -749,6 +913,12 @@ oi_apply_derive() { ) derive_sql_signature="$(oi_hash_files "$derive_file")" derive_code_signature="$(oi_hash_files "${derive_source_files[@]}")" + relation_cache_file="$(oi_extract_interstate_relation_cache)" + if [[ -n "$relation_cache_file" && -f "$relation_cache_file" ]]; then + relation_cache_signature="$(oi_file_signature "$relation_cache_file")" + else + relation_cache_signature="no-relation-cache" + fi reachability_signature="$( { oi_db_query "SELECT COUNT(*), COALESCE(MAX(updated_at)::text, '') FROM exit_poi_reachability;" 2>/dev/null || true @@ -760,6 +930,7 @@ oi_apply_derive() { printf 'import=%s\n' "$import_signature" printf 'derive_sql=%s\n' "$derive_sql_signature" printf 'derive_code=%s\n' "$derive_code_signature" + printf 'relation_cache=%s\n' "$relation_cache_signature" printf 'reachability=%s\n' "$reachability_signature" } | oi_hash_stdin )" @@ -773,9 +944,16 @@ oi_apply_derive() { oi_db_exec psql -U "$OI_DB_USER" -d "$OI_DB_NAME" -v ON_ERROR_STOP=1 < "$derive_file" oi_log "Building graph, corridors, and reference routes" - oi_runner cargo run --release -p openinterstate-derive -- \ - --database-url "$PRODUCT_DB_URL" \ - all + if [[ -n "$relation_cache_file" && -f "$relation_cache_file" ]]; then + oi_runner cargo run --release -p openinterstate-derive -- \ + --database-url "$PRODUCT_DB_URL" \ + --interstate-relation-cache "$(oi_container_path "$relation_cache_file")" \ + all + else + oi_runner cargo run --release -p openinterstate-derive -- \ + --database-url "$PRODUCT_DB_URL" \ + all + fi oi_state_write "$derive_state_file" \ signature "$derive_signature" \ import_signature "$import_signature" \ diff --git a/bin/openinterstate b/bin/openinterstate index 48a3707..d7628f0 100755 --- a/bin/openinterstate +++ b/bin/openinterstate @@ -14,6 +14,7 @@ Usage: Commands: build Download or use a source PBF, import it, derive data, and export a release + focus-build Extract a bounded regional PBF and run the same import, derive, and release flow on it download Download the latest U.S. PBF into the managed data directory import Import canonical OSM into local PostGIS derive Build product tables, graph data, corridors, and reference routes @@ -33,6 +34,13 @@ External data volume with explicit release output: --data-dir /Volumes/goose-drive/openinterstate-data \ --release-dir /Volumes/goose-drive/openinterstate-releases \ build + +Focused local iteration on the George Washington Bridge I-95 area: + ./bin/openinterstate focus-build \ + --source-pbf-file /abs/path/to/us-latest.osm.pbf \ + --bbox -73.99,40.84,-73.94,40.88 \ + --buffer-km 2 \ + --extract-name i95-gwb USAGE } @@ -153,6 +161,98 @@ case "$COMMAND" in echo " release: $OUTPUT_ROOT/$RELEASE_ID" echo " archive: $OUTPUT_ROOT/openinterstate-$RELEASE_ID.tar.gz" ;; + focus-build) + SOURCE_PBF_FILE="" + BBOX="" + BUFFER_KM="0" + EXTRACT_NAME="" + RELEASE_ID="" + OUTPUT_ROOT="$OI_RELEASE_DIR" + PREFILTER=true + FORCE_PREFILTER=false + FORCE_EXTRACT=false + + while [[ $# -gt 0 ]]; do + case "$1" in + --source-pbf-file) + SOURCE_PBF_FILE="$2" + shift 2 + ;; + --bbox) + BBOX="$2" + shift 2 + ;; + --buffer-km) + BUFFER_KM="$2" + shift 2 + ;; + --extract-name) + EXTRACT_NAME="$2" + shift 2 + ;; + --release-id) + RELEASE_ID="$2" + shift 2 + ;; + --output-root) + OUTPUT_ROOT="$2" + shift 2 + ;; + --release-dir) + OUTPUT_ROOT="$2" + shift 2 + ;; + --skip-prefilter) + PREFILTER=false + shift 1 + ;; + --force-prefilter) + FORCE_PREFILTER=true + shift 1 + ;; + --force-extract) + FORCE_EXTRACT=true + shift 1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + oi_die "unknown arg: $1" + ;; + esac + done + + [[ -n "$SOURCE_PBF_FILE" ]] || oi_die "--source-pbf-file is required" + [[ -n "$BBOX" ]] || oi_die "--bbox is required" + + if [[ -z "$EXTRACT_NAME" ]]; then + EXTRACT_NAME="focus-$(printf '%s|%s|%s\n' "$SOURCE_PBF_FILE" "$BBOX" "$BUFFER_KM" | oi_hash_stdin | cut -c1-12)" + fi + EXTRACT_NAME="$(oi_safe_slug "$EXTRACT_NAME")" + if [[ -z "$RELEASE_ID" ]]; then + RELEASE_ID="focus-${EXTRACT_NAME}-$(date +%F)" + fi + + EXTRACT_PBF="$OI_DOWNLOAD_DIR/${EXTRACT_NAME}.osm.pbf" + FOCUSED_SOURCE_PBF="$(oi_extract_region_pbf "$SOURCE_PBF_FILE" "$EXTRACT_PBF" "$BBOX" "$BUFFER_KM" "$FORCE_EXTRACT")" || exit $? + + export OSM2PGSQL_MODE=create + export OI_ALLOW_CANONICAL_RESET=true + + oi_db_up + oi_wait_for_db + + IMPORT_PBF="$(oi_import_canonical "$FOCUSED_SOURCE_PBF" "$PREFILTER" "$FORCE_PREFILTER")" || exit $? + oi_apply_derive + oi_export_release "$RELEASE_ID" "$FOCUSED_SOURCE_PBF" "$IMPORT_PBF" "" "$OUTPUT_ROOT" + + oi_log "Focused build complete" + echo " extract: $FOCUSED_SOURCE_PBF" + echo " release: $OUTPUT_ROOT/$RELEASE_ID" + echo " archive: $OUTPUT_ROOT/openinterstate-$RELEASE_ID.tar.gz" + ;; download) SOURCE_URL="$OI_DEFAULT_US_PBF_URL" OUTPUT="" diff --git a/crates/derive/src/canonical_types.rs b/crates/derive/src/canonical_types.rs index 14ad331..a7a9b98 100644 --- a/crates/derive/src/canonical_types.rs +++ b/crates/derive/src/canonical_types.rs @@ -6,6 +6,7 @@ pub struct ParsedExit { #[derive(Debug, Clone)] pub struct ParsedHighway { + pub way_id: i64, pub refs: Vec, pub nodes: Vec, pub geometry: Vec<(f64, f64)>, diff --git a/crates/derive/src/graph/compress.rs b/crates/derive/src/graph/compress.rs index c7b04f1..6ed1012 100644 --- a/crates/derive/src/graph/compress.rs +++ b/crates/derive/src/graph/compress.rs @@ -1,4 +1,5 @@ -use std::collections::{hash_map::Entry, BTreeSet, HashMap, HashSet, VecDeque}; +use std::cmp::Ordering; +use std::collections::{hash_map::Entry, BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque}; use openinterstate_core::geo::haversine_distance; use openinterstate_core::highway_ref::is_interstate_highway_ref; @@ -20,6 +21,7 @@ pub(super) struct CompressedEdge { pub(super) min_lon: f64, pub(super) max_lon: f64, pub(super) polyline_json: String, + pub(super) source_way_ids_json: String, /// WKT LineString for PostGIS geom column. pub(super) geom_wkt: String, /// Cardinal direction of this edge's component ("N","S","E","W" or None). @@ -39,6 +41,39 @@ struct HighwayGraph { node_coords: HashMap, neighbors_directed: HashMap>, neighbors_undirected: HashMap>, + arc_way_ids: HashMap<(i64, i64), BTreeSet>, +} + +struct ConnectorGraph { + adjacency: HashMap>, +} + +#[derive(Clone, Copy)] +struct ConnectorArc { + next: i64, + weight_m: u64, + way_id: i64, +} + +#[derive(Clone, Copy, Eq, PartialEq)] +struct SearchState { + cost_m: u64, + node: i64, +} + +impl Ord for SearchState { + fn cmp(&self, other: &Self) -> Ordering { + other + .cost_m + .cmp(&self.cost_m) + .then_with(|| self.node.cmp(&other.node)) + } +} + +impl PartialOrd for SearchState { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } /// Pure function: compress highway ways into directed edges. @@ -112,102 +147,346 @@ fn build_exit_node_index(exits: &[ParsedExit]) -> (HashSet, HashMap HashMap> { - let mut ways_by_highway: HashMap> = HashMap::new(); + let mut assigned_way_ids: HashMap> = HashMap::new(); + let mut ways_by_id: HashMap = HashMap::new(); let mut refless_motorways: Vec<&ParsedHighway> = Vec::new(); + let mut blank_ref_connectors: Vec<&ParsedHighway> = Vec::new(); for way in highways { - // Skip motorway_link ways that don't carry a ref — these are exit ramps. - // Links WITH a ref (e.g. "I 40") are mainline connectors at interchanges - // where interstates merge/split. - if way.highway_type == "motorway_link" && way.refs.is_empty() { + ways_by_id.insert(way.way_id, way); + + let interstate_refs: Vec = way + .refs + .iter() + .filter(|reference| is_interstate_highway_ref(reference)) + .cloned() + .collect(); + if !interstate_refs.is_empty() { + for highway in interstate_refs { + assigned_way_ids + .entry(highway) + .or_default() + .insert(way.way_id); + } continue; } - if way.refs.is_empty() && way.highway_type == "motorway" { - // Refless motorway ways (e.g. "Sam Cooper Boulevard" in Memphis is - // physically I-40 but tagged without ref). Collect for node-based - // assignment in a second pass onto an Interstate graph. - refless_motorways.push(way); - continue; + if way.refs.is_empty() { + blank_ref_connectors.push(way); + if way.highway_type == "motorway" { + refless_motorways.push(way); + } } + } + + adopt_refless_motorways_by_shared_nodes(&mut assigned_way_ids, &refless_motorways, &ways_by_id); + + let connector_graph = build_blank_ref_connector_graph(&blank_ref_connectors); + adopt_blank_ref_connector_paths(&mut assigned_way_ids, &ways_by_id, &connector_graph); + + let mut ways_by_highway: HashMap> = HashMap::new(); + for (highway, way_ids) in assigned_way_ids { + let mut ways: Vec<&ParsedHighway> = way_ids + .into_iter() + .filter_map(|way_id| ways_by_id.get(&way_id).copied()) + .collect(); + ways.sort_by_key(|way| way.way_id); + ways_by_highway.insert(highway, ways); + } + + ways_by_highway +} - for reference in &way.refs { - if is_interstate_highway_ref(reference) { - ways_by_highway - .entry(reference.clone()) +fn adopt_refless_motorways_by_shared_nodes( + assigned_way_ids: &mut HashMap>, + refless_motorways: &[&ParsedHighway], + ways_by_id: &HashMap, +) { + if refless_motorways.is_empty() { + return; + } + + let mut node_highways: HashMap> = HashMap::new(); + for (highway, way_ids) in assigned_way_ids.iter() { + for way_id in way_ids { + let Some(way) = ways_by_id.get(way_id).copied() else { + continue; + }; + for &node_id in &way.nodes { + node_highways + .entry(node_id) .or_default() - .push(way); + .insert(highway.clone()); } } } - // Second pass: assign refless motorway ways to highways via shared OSM nodes. - // Build node → set of highways index from the first pass. - if !refless_motorways.is_empty() { - let mut node_highways: HashMap> = HashMap::new(); - for (highway, ways) in &ways_by_highway { - for way in ways { - for &node_id in &way.nodes { - node_highways - .entry(node_id) - .or_default() - .insert(highway.clone()); + let mut remaining = refless_motorways.to_vec(); + let mut total_assigned = 0usize; + loop { + let mut unmatched: Vec<&ParsedHighway> = Vec::new(); + let mut assigned_this_round = 0usize; + for way in remaining { + let mut matched: HashSet = HashSet::new(); + for &node_id in &way.nodes { + if let Some(highways) = node_highways.get(&node_id) { + matched.extend(highways.iter().cloned()); } } + if matched.is_empty() { + unmatched.push(way); + continue; + } + + for highway in &matched { + assigned_way_ids + .entry(highway.clone()) + .or_default() + .insert(way.way_id); + } + for &node_id in &way.nodes { + node_highways + .entry(node_id) + .or_default() + .extend(matched.iter().cloned()); + } + assigned_this_round += 1; } + total_assigned += assigned_this_round; + if assigned_this_round == 0 { + break; + } + remaining = unmatched; + } + + if total_assigned > 0 { + tracing::info!( + "Adopted {} refless motorway ways via shared nodes", + total_assigned + ); + } +} + +fn build_blank_ref_connector_graph(ways: &[&ParsedHighway]) -> ConnectorGraph { + let mut adjacency: HashMap> = HashMap::new(); + + for way in ways { + if way.nodes.len() < 2 || way.geometry.len() < 2 || way.nodes.len() != way.geometry.len() { + continue; + } + + for idx in 0..way.nodes.len() - 1 { + let start = way.nodes[idx]; + let end = way.nodes[idx + 1]; + let start_coord = way.geometry[idx]; + let end_coord = way.geometry[idx + 1]; + let weight_m = + haversine_distance(start_coord.0, start_coord.1, end_coord.0, end_coord.1) + .round() + .max(1.0) as u64; + + adjacency.entry(start).or_default().push(ConnectorArc { + next: end, + weight_m, + way_id: way.way_id, + }); + if !way.is_oneway { + adjacency.entry(end).or_default().push(ConnectorArc { + next: start, + weight_m, + way_id: way.way_id, + }); + } + } + } + + ConnectorGraph { adjacency } +} + +fn adopt_blank_ref_connector_paths( + assigned_way_ids: &mut HashMap>, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, +) { + let mut highways: Vec = assigned_way_ids.keys().cloned().collect(); + highways.sort(); + + for highway in highways { + let mut total_adopted = 0usize; - // Iterate to handle chains of refless ways (A→B→C where only A touches a ref'd way). - let mut remaining = refless_motorways; - let mut total_assigned = 0usize; loop { - let mut unmatched: Vec<&ParsedHighway> = Vec::new(); - let mut assigned_this_round = 0usize; - for way in remaining { - let mut matched: HashSet = HashSet::new(); - for &node_id in &way.nodes { - if let Some(hws) = node_highways.get(&node_id) { - matched.extend(hws.iter().cloned()); - } - } - if matched.is_empty() { - unmatched.push(way); - } else { - for highway in &matched { - ways_by_highway - .entry(highway.clone()) - .or_default() - .push(way); + let Some(assigned_ids) = assigned_way_ids.get(&highway) else { + break; + }; + let assigned_ways: Vec<&ParsedHighway> = assigned_ids + .iter() + .filter_map(|way_id| ways_by_id.get(way_id).copied()) + .collect(); + let components = highway_connected_components(&assigned_ways); + if components.len() <= 1 { + break; + } + + let mut best_path: Option<(u64, i32, Vec)> = None; + for (source_comp, source_nodes, _) in &components { + let mut target_components_by_node: HashMap = HashMap::new(); + for (target_comp, target_nodes, _) in &components { + if target_comp == source_comp { + continue; } - for &node_id in &way.nodes { - node_highways - .entry(node_id) - .or_default() - .extend(matched.iter().cloned()); + for &node_id in target_nodes { + target_components_by_node.insert(node_id, *target_comp); } - assigned_this_round += 1; } + + let Some(candidate) = shortest_connector_path_to_any( + source_nodes, + &target_components_by_node, + connector_graph, + ) else { + continue; + }; + if best_path + .as_ref() + .is_none_or(|(best_cost, _, _)| candidate.0 < *best_cost) + { + best_path = Some(candidate); + } + } + + let Some((_cost_m, target_comp, path_way_ids)) = best_path else { + break; + }; + + let entry = assigned_way_ids.entry(highway.clone()).or_default(); + let mut adopted_this_round = 0usize; + for way_id in path_way_ids { + adopted_this_round += usize::from(entry.insert(way_id)); } - total_assigned += assigned_this_round; - if assigned_this_round == 0 { + + if adopted_this_round == 0 { + tracing::debug!( + "{}: connector path to component {} yielded no new ways", + highway, + target_comp + ); break; } - remaining = unmatched; + total_adopted += adopted_this_round; } - if total_assigned > 0 { + + if total_adopted > 0 { tracing::info!( - "Adopted {} refless motorway ways via shared nodes", - total_assigned + "Adopted {} blank-ref connector ways via graph search for {}", + total_adopted, + highway ); } } +} - ways_by_highway +fn highway_connected_components(ways: &[&ParsedHighway]) -> Vec<(i32, HashSet, usize)> { + let Some(graph) = build_highway_graph(ways) else { + return Vec::new(); + }; + let component_by_node = compute_components(&graph.neighbors_undirected); + let mut nodes_by_component: HashMap> = HashMap::new(); + let mut way_count_by_component: HashMap = HashMap::new(); + + for way in ways { + let Some(first_node) = way.nodes.first() else { + continue; + }; + let Some(&component) = component_by_node.get(first_node) else { + continue; + }; + *way_count_by_component.entry(component).or_default() += 1; + nodes_by_component + .entry(component) + .or_default() + .extend(way.nodes.iter().copied()); + } + + let mut components: Vec<(i32, HashSet, usize)> = nodes_by_component + .into_iter() + .map(|(component, nodes)| { + ( + component, + nodes, + way_count_by_component.get(&component).copied().unwrap_or(0), + ) + }) + .collect(); + components.sort_by_key(|(component, _, _)| *component); + components +} + +fn shortest_connector_path_to_any( + sources: &HashSet, + targets_by_node: &HashMap, + connector_graph: &ConnectorGraph, +) -> Option<(u64, i32, Vec)> { + if sources.is_empty() || targets_by_node.is_empty() { + return None; + } + + let mut heap = BinaryHeap::new(); + let mut dist: HashMap = HashMap::new(); + let mut prev: HashMap = HashMap::new(); + + for &source in sources { + dist.insert(source, 0); + heap.push(SearchState { + cost_m: 0, + node: source, + }); + } + + while let Some(SearchState { cost_m, node }) = heap.pop() { + if cost_m > dist.get(&node).copied().unwrap_or(u64::MAX) { + continue; + } + + if let Some(&target_component) = targets_by_node.get(&node) { + if !sources.contains(&node) { + let mut way_ids = Vec::new(); + let mut current = node; + while !sources.contains(¤t) { + let &(prev_node, way_id) = prev.get(¤t)?; + way_ids.push(way_id); + current = prev_node; + } + way_ids.reverse(); + way_ids.dedup(); + return Some((cost_m, target_component, way_ids)); + } + } + + let Some(arcs) = connector_graph.adjacency.get(&node) else { + continue; + }; + for arc in arcs { + let next_cost = cost_m.saturating_add(arc.weight_m); + if next_cost >= dist.get(&arc.next).copied().unwrap_or(u64::MAX) { + continue; + } + dist.insert(arc.next, next_cost); + prev.insert(arc.next, (node, arc.way_id)); + heap.push(SearchState { + cost_m: next_cost, + node: arc.next, + }); + } + } + + None } fn build_highway_graph(highway_ways: &[&ParsedHighway]) -> Option { let mut node_coords: HashMap = HashMap::new(); let mut neighbors_directed: HashMap> = HashMap::new(); let mut neighbors_undirected: HashMap> = HashMap::new(); + let mut arc_way_ids: HashMap<(i64, i64), BTreeSet> = HashMap::new(); for way in highway_ways { if way.nodes.len() < 2 || way.geometry.len() < 2 || way.nodes.len() != way.geometry.len() { @@ -223,8 +502,16 @@ fn build_highway_graph(highway_ways: &[&ParsedHighway]) -> Option let end = way.nodes[idx + 1]; neighbors_directed.entry(start).or_default().insert(end); + arc_way_ids + .entry((start, end)) + .or_default() + .insert(way.way_id); if !way.is_oneway { neighbors_directed.entry(end).or_default().insert(start); + arc_way_ids + .entry((end, start)) + .or_default() + .insert(way.way_id); } neighbors_undirected.entry(start).or_default().insert(end); @@ -239,6 +526,7 @@ fn build_highway_graph(highway_ways: &[&ParsedHighway]) -> Option node_coords, neighbors_directed, neighbors_undirected, + arc_way_ids, }) } } @@ -342,6 +630,11 @@ fn walk_compressed_edges( let mut polyline = vec![start_coord, first_coord]; let mut length_m = haversine_distance(start_coord.0, start_coord.1, first_coord.0, first_coord.1); + let mut source_way_ids: BTreeSet = graph + .arc_way_ids + .get(&first_edge) + .cloned() + .unwrap_or_default(); visited_directed.insert(first_edge); let mut prev = start_node; @@ -375,6 +668,9 @@ fn walk_compressed_edges( polyline.push(next_coord); length_m += haversine_distance(cur_coord.0, cur_coord.1, next_coord.0, next_coord.1); + if let Some(way_ids) = graph.arc_way_ids.get(&next_edge) { + source_way_ids.extend(way_ids.iter().copied()); + } visited_directed.insert(next_edge); prev = cur; @@ -402,6 +698,9 @@ fn walk_compressed_edges( .collect::>(), ) .unwrap_or_else(|_| "[]".to_string()); + let source_way_ids_json = + serde_json::to_string(&source_way_ids.into_iter().collect::>()) + .unwrap_or_else(|_| "[]".to_string()); let geom_wkt = polyline_to_linestring_wkt(&polyline); edges.push(CompressedEdge { @@ -416,6 +715,7 @@ fn walk_compressed_edges( min_lon, max_lon, polyline_json, + source_way_ids_json, geom_wkt, direction: None, }); @@ -513,17 +813,33 @@ mod tests { } fn sample_highway( - _id: &str, + id: &str, refs: &[&str], nodes: &[i64], geometry: &[(f64, f64)], + ) -> ParsedHighway { + sample_highway_kind(id, "motorway", refs, nodes, geometry, true) + } + + fn sample_highway_kind( + id: &str, + highway_type: &str, + refs: &[&str], + nodes: &[i64], + geometry: &[(f64, f64)], + is_oneway: bool, ) -> ParsedHighway { ParsedHighway { + way_id: id + .split('/') + .nth(1) + .and_then(|value| value.parse::().ok()) + .unwrap_or(0), refs: refs.iter().map(|value| (*value).to_string()).collect(), nodes: nodes.to_vec(), geometry: geometry.to_vec(), - highway_type: "motorway".to_string(), - is_oneway: true, + highway_type: highway_type.to_string(), + is_oneway, } } @@ -606,4 +922,44 @@ mod tests { assert_eq!(edges[0].highway, "I-280"); assert!(corridor_entries.is_empty()); } + + #[test] + fn adopts_blank_ref_motorway_link_connector_paths() { + let highways = vec![ + sample_highway( + "way/1", + &["I-95"], + &[1, 2], + &[(39.0, -76.0), (39.01, -76.0)], + ), + sample_highway_kind( + "way/2", + "motorway_link", + &[], + &[2, 3], + &[(39.01, -76.0), (39.02, -75.99)], + true, + ), + sample_highway( + "way/3", + &["I-95"], + &[3, 4], + &[(39.02, -75.99), (39.03, -75.98)], + ), + ]; + + let (edges, _) = compress_highway_graph(&highways, &[]); + + let i95_edges: Vec<&CompressedEdge> = + edges.iter().filter(|edge| edge.highway == "I-95").collect(); + let components: HashSet = i95_edges.iter().map(|edge| edge.component).collect(); + assert_eq!( + components.len(), + 1, + "blank-ref connector should unify the highway graph" + ); + assert!(i95_edges + .iter() + .any(|edge| edge.start_node == 1 && edge.end_node == 4)); + } } diff --git a/crates/derive/src/graph/corridors.rs b/crates/derive/src/graph/corridors.rs index 0e89b9c..f1bc8e0 100644 --- a/crates/derive/src/graph/corridors.rs +++ b/crates/derive/src/graph/corridors.rs @@ -91,6 +91,7 @@ struct CorridorResult { member_edge_ids: Vec, /// Displacement vector (delta_lat, delta_lon) for projection-based exit sorting. displacement: (f64, f64), + sample_points: Vec<(f64, f64)>, } struct CorridorExitResult { @@ -101,6 +102,8 @@ struct CorridorExitResult { lon: f64, } +const MINOR_CORRIDOR_ABSORPTION_MAX_GAP_M: f64 = 10_000.0; + // ============================================================================ // Union-Find // ============================================================================ @@ -1049,6 +1052,7 @@ fn build_highway_corridors( dlat += e.end_lat - e.start_lat; dlon += e.end_lon - e.start_lon; } + let sample_points = corridor_sample_points(&ordered_exits, &group_edges); results.push(CorridorResult { corridor_id, @@ -1057,111 +1061,171 @@ fn build_highway_corridors( exits: ordered_exits, member_edge_ids, displacement: (dlat, dlon), + sample_points, }); } - // 7. Absorb minor corridors into same-direction primary corridors. - // A corridor is "minor" if it has 0 exits OR has <5% of the exits - // of the largest same-direction corridor. These are interchange stubs, - // short carriageway fragments from the directional split, etc. - if results.len() > 2 { - // Find the largest corridor per direction by edge count - let mut max_edges_by_dir: HashMap, usize> = HashMap::new(); - for c in &results { - let entry = max_edges_by_dir - .entry(c.canonical_direction.clone()) - .or_default(); - *entry = (*entry).max(c.member_edge_ids.len()); + absorb_minor_corridors(highway, results) +} + +// ============================================================================ +// Proximity merging +// ============================================================================ + +fn absorb_minor_corridors(highway: &str, results: Vec) -> Vec { + // Keep corridor ids sparse after absorption. Reusing ids here can make a + // later highway overwrite an earlier corridor row and mix their edge sets. + if results.len() <= 2 { + return results; + } + + let mut max_edges_by_dir: HashMap, usize> = HashMap::new(); + for corridor in &results { + let entry = max_edges_by_dir + .entry(corridor.canonical_direction.clone()) + .or_default(); + *entry = (*entry).max(corridor.member_edge_ids.len()); + } + + let mut keep = Vec::new(); + let mut orphans = Vec::new(); + let mut absorbed_count = 0usize; + + for corridor in results { + let primary_edges = max_edges_by_dir + .get(&corridor.canonical_direction) + .copied() + .unwrap_or(0); + // A corridor is minor if it has <10% of the edges of the largest + // same-direction corridor, OR if it has very few exits relative to + // its edge count. + let is_edge_minor = corridor.member_edge_ids.len() * 10 < primary_edges; + let is_exit_minor = + corridor.exits.len() <= 5 && primary_edges > corridor.member_edge_ids.len(); + let is_minor = is_edge_minor || is_exit_minor; + if is_minor { + absorbed_count += 1; + orphans.push(corridor); + } else { + keep.push(corridor); } + } - let mut orphan_edges_by_dir: HashMap, Vec> = HashMap::new(); - let mut orphan_exits_by_dir: HashMap, Vec> = - HashMap::new(); - let mut keep = Vec::new(); - let mut absorbed_count = 0usize; + if absorbed_count == 0 { + return keep; + } - for c in results.into_iter() { - let primary_edges = max_edges_by_dir - .get(&c.canonical_direction) - .copied() - .unwrap_or(0); - // A corridor is minor if it has <10% of the edges of the largest - // same-direction corridor, OR if it has very few exits relative - // to its edges. - let is_edge_minor = c.member_edge_ids.len() * 10 < primary_edges; - let is_exit_minor = c.exits.len() <= 5 && primary_edges > c.member_edge_ids.len(); - let is_minor = is_edge_minor || is_exit_minor; - if is_minor { - absorbed_count += 1; - *next_id -= 1; - orphan_edges_by_dir - .entry(c.canonical_direction.clone()) - .or_default() - .extend(c.member_edge_ids); - orphan_exits_by_dir - .entry(c.canonical_direction.clone()) - .or_default() - .extend(c.exits); - } else { - keep.push(c); - } + tracing::info!( + " {}: absorbing {} minor corridors into same-direction primaries", + highway, + absorbed_count + ); + + for orphan in orphans { + let target = nearest_corridor_target(&orphan, &keep, MINOR_CORRIDOR_ABSORPTION_MAX_GAP_M); + + if let Some(target_idx) = target { + merge_corridors(&mut keep[target_idx], orphan); + } else { + keep.push(orphan); } - results = keep; + } - if absorbed_count > 0 { - tracing::info!( - " {}: absorbing {} minor corridors into same-direction primaries", - highway, - absorbed_count - ); - for (dir, edges) in orphan_edges_by_dir { - let exits = orphan_exits_by_dir.remove(&dir).unwrap_or_default(); - let target = results - .iter() - .enumerate() - .filter(|(_, t)| t.canonical_direction == dir) - .max_by_key(|(_, t)| t.member_edge_ids.len()) - .map(|(j, _)| j) - .or_else(|| { - results - .iter() - .enumerate() - .max_by_key(|(_, t)| t.member_edge_ids.len()) - .map(|(j, _)| j) - }); - if let Some(ti) = target { - results[ti].member_edge_ids.extend(edges); - // Binary-insert each absorbed exit into the already-sorted - // list at the correct projection position. - let (dlat, dlon) = results[ti].displacement; - let has_disp = dlat.abs() > 1e-6 || dlon.abs() > 1e-6; - for exit in exits { - let proj = if has_disp { - exit.lat * dlat + exit.lon * dlon - } else { - exit.lat - }; - let pos = results[ti].exits.partition_point(|e| { - let ep = if has_disp { - e.lat * dlat + e.lon * dlon - } else { - e.lat - }; - ep < proj - }); - results[ti].exits.insert(pos, exit); - } - } - } + keep.sort_by_key(|corridor| corridor.corridor_id); + keep +} + +fn corridor_sample_points( + exits: &[CorridorExitResult], + group_edges: &[&EdgeData], +) -> Vec<(f64, f64)> { + if !exits.is_empty() { + return exits.iter().map(|exit| (exit.lat, exit.lon)).collect(); + } + + let mut points = Vec::with_capacity(group_edges.len() * 2); + for edge in group_edges { + points.push((edge.start_lat, edge.start_lon)); + points.push((edge.end_lat, edge.end_lon)); + } + points +} + +fn nearest_corridor_target( + orphan: &CorridorResult, + keep: &[CorridorResult], + max_gap_m: f64, +) -> Option { + keep.iter() + .enumerate() + .filter(|(_, candidate)| candidate.canonical_direction == orphan.canonical_direction) + .filter_map(|(idx, candidate)| { + let gap_m = corridor_gap_m(orphan, candidate)?; + (gap_m <= max_gap_m).then_some((idx, gap_m, candidate.member_edge_ids.len())) + }) + .min_by(|a, b| { + a.1.partial_cmp(&b.1) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b.2.cmp(&a.2)) + }) + .map(|(idx, _, _)| idx) + .or_else(|| { + keep.iter() + .enumerate() + .filter_map(|(idx, candidate)| { + let gap_m = corridor_gap_m(orphan, candidate)?; + (gap_m <= max_gap_m).then_some((idx, gap_m, candidate.member_edge_ids.len())) + }) + .min_by(|a, b| { + a.1.partial_cmp(&b.1) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b.2.cmp(&a.2)) + }) + .map(|(idx, _, _)| idx) + }) +} + +fn corridor_gap_m(a: &CorridorResult, b: &CorridorResult) -> Option { + let mut best: Option = None; + + for &(alat, alon) in &a.sample_points { + for &(blat, blon) in &b.sample_points { + let gap_m = haversine_distance(alat, alon, blat, blon); + best = Some(match best { + Some(current) => current.min(gap_m), + None => gap_m, + }); } } - results + best } -// ============================================================================ -// Proximity merging -// ============================================================================ +fn merge_corridors(target: &mut CorridorResult, source: CorridorResult) { + target.member_edge_ids.extend(source.member_edge_ids); + target.sample_points.extend(source.sample_points); + + // Binary-insert each absorbed exit into the already-sorted list at the + // correct projection position. + let (dlat, dlon) = target.displacement; + let has_disp = dlat.abs() > 1e-6 || dlon.abs() > 1e-6; + for exit in source.exits { + let proj = if has_disp { + exit.lat * dlat + exit.lon * dlon + } else { + exit.lat + }; + let pos = target.exits.partition_point(|existing| { + let existing_proj = if has_disp { + existing.lat * dlat + existing.lon * dlon + } else { + existing.lat + }; + existing_proj < proj + }); + target.exits.insert(pos, exit); + } +} fn merge_components_by_proximity( edges_by_comp: &HashMap>, @@ -1684,3 +1748,100 @@ async fn write_corridors( edges_updated, }) } + +#[cfg(test)] +mod tests { + use super::{absorb_minor_corridors, CorridorExitResult, CorridorResult}; + + fn corridor( + corridor_id: i32, + direction: &str, + edge_count: usize, + exit_count: usize, + sample_points: &[(f64, f64)], + ) -> CorridorResult { + CorridorResult { + corridor_id, + highway: "I-TEST".to_string(), + canonical_direction: Some(direction.to_string()), + exits: (0..exit_count) + .map(|idx| CorridorExitResult { + exit_id: format!("exit-{corridor_id}-{idx}"), + ref_val: None, + name: None, + lat: idx as f64, + lon: idx as f64, + }) + .collect(), + member_edge_ids: (0..edge_count) + .map(|idx| format!("edge-{corridor_id}-{idx}")) + .collect(), + displacement: (1.0, 0.0), + sample_points: sample_points.to_vec(), + } + } + + #[test] + fn absorbed_corridors_do_not_force_dense_id_reuse() { + let kept = absorb_minor_corridors( + "I-TEST", + vec![ + corridor(10, "north", 100, 20, &[(45.0, -122.0)]), + corridor(11, "north", 2, 1, &[(45.001, -122.001)]), + corridor(12, "north", 90, 18, &[(41.0, -75.0)]), + ], + ); + + let ids: Vec = kept.iter().map(|corridor| corridor.corridor_id).collect(); + assert_eq!(ids, vec![10, 12]); + assert!(kept.iter().any(|corridor| { + corridor.corridor_id == 10 + && corridor + .member_edge_ids + .iter() + .any(|edge_id| edge_id == "edge-11-0") + })); + } + + #[test] + fn absorbed_corridor_chooses_nearest_same_direction_target() { + let kept = absorb_minor_corridors( + "I-TEST", + vec![ + corridor(10, "north", 100, 20, &[(45.0, -122.0)]), + corridor(11, "north", 2, 1, &[(41.424, -75.611)]), + corridor(12, "north", 90, 18, &[(41.423, -75.610)]), + ], + ); + + assert!(kept.iter().any(|corridor| { + corridor.corridor_id == 12 + && corridor + .member_edge_ids + .iter() + .any(|edge_id| edge_id == "edge-11-0") + })); + assert!(!kept.iter().any(|corridor| { + corridor.corridor_id == 10 + && corridor + .member_edge_ids + .iter() + .any(|edge_id| edge_id == "edge-11-0") + })); + } + + #[test] + fn distant_minor_corridor_is_left_separate() { + let kept = absorb_minor_corridors( + "I-TEST", + vec![ + corridor(10, "north", 100, 20, &[(45.0, -122.0)]), + corridor(11, "north", 2, 1, &[(30.0, -90.0)]), + corridor(12, "north", 90, 18, &[(41.0, -75.0)]), + ], + ); + + let ids: Vec = kept.iter().map(|corridor| corridor.corridor_id).collect(); + assert_eq!(ids, vec![10, 11, 12]); + } +} diff --git a/crates/derive/src/graph/mod.rs b/crates/derive/src/graph/mod.rs index 6666aad..6b9acb6 100644 --- a/crates/derive/src/graph/mod.rs +++ b/crates/derive/src/graph/mod.rs @@ -2,11 +2,16 @@ mod component_ids; mod compress; pub mod corridors; mod directions; +pub mod relation_corridors; + +use std::collections::HashMap; +use std::path::Path; use openinterstate_core::highway_ref::{is_interstate_highway_ref, normalize_highway_ref}; use sqlx::PgPool; use crate::canonical_types::{ParsedExit, ParsedHighway}; +use crate::interstate_relations::load_relation_refs_by_way; use component_ids::stabilize_component_ids; use compress::compress_highway_graph; @@ -15,15 +20,29 @@ use compress::compress_highway_graph; /// `highway_edges` + `exit_corridors`. /// /// 1. Load highways and exits from osm2pgsql canonical tables -/// 2. Group ways by highway ref (include motorway_link only when it carries a ref) +/// 2. Group ways by highway ref and adopt unlabeled high-class connector paths /// 3. Build directed adjacency graph per highway /// 4. Detect connected components (separates EB/WB carriageways) /// 5. Walk directed edges between stop nodes to create compressed edges /// 6. Compute cardinal direction per component /// 7. Write edges into `highway_edges` and corridor entries into `exit_corridors` -pub async fn build_graph(pool: &PgPool) -> Result { +pub async fn build_graph( + pool: &PgPool, + interstate_relation_cache: Option<&Path>, +) -> Result { + let relation_refs_by_way = if let Some(path) = interstate_relation_cache { + let refs = load_relation_refs_by_way(path)?; + tracing::info!( + "Loaded Interstate relation memberships for {} way ids", + refs.len() + ); + refs + } else { + HashMap::new() + }; + tracing::info!("Loading highways from osm2pgsql_v2_highways..."); - let highways = load_highways(pool).await?; + let highways = load_highways(pool, &relation_refs_by_way).await?; tracing::info!("Loaded {} highway ways", highways.len()); tracing::info!("Loading exits from osm2pgsql_v2_exits_nodes..."); @@ -50,9 +69,9 @@ pub async fn build_graph(pool: &PgPool) -> Result { sqlx::query( "INSERT INTO highway_edges \ (id, highway, component, start_node, end_node, length_m, \ - geom, min_lat, max_lat, min_lon, max_lon, polyline_json, direction) \ + geom, min_lat, max_lat, min_lon, max_lon, polyline_json, source_way_ids_json, direction) \ VALUES ($1, $2, $3, $4, $5, $6, \ - ST_GeomFromText($7, 4326), $8, $9, $10, $11, $12, $13) \ + ST_GeomFromText($7, 4326), $8, $9, $10, $11, $12, $13, $14) \ ON CONFLICT (id) DO NOTHING", ) .bind(&edge.id) @@ -67,6 +86,7 @@ pub async fn build_graph(pool: &PgPool) -> Result { .bind(edge.min_lon) .bind(edge.max_lon) .bind(&edge.polyline_json) + .bind(&edge.source_way_ids_json) .bind(&edge.direction) .execute(&mut *tx) .await?; @@ -117,58 +137,69 @@ pub async fn build_graph(pool: &PgPool) -> Result { type HighwayRow = ( i64, // way_id String, // highway type - Option, // ref + Option, // ref / int_ref Option, // oneway Vec, // node_ids String, // geom as GeoJSON ); -async fn load_highways(pool: &PgPool) -> Result, anyhow::Error> { - let rows: Vec = sqlx::query_as( - "SELECT way_id, highway, ref, oneway, node_ids, \ - ST_AsGeoJSON(geom)::text \ - FROM osm2pgsql_v2_highways \ - WHERE highway IN ('motorway', 'trunk') \ - AND node_ids IS NOT NULL \ - AND array_length(node_ids, 1) >= 2 \ - AND ( \ - highway = 'motorway' \ - OR COALESCE(NULLIF(BTRIM(ref), ''), NULLIF(BTRIM(tags ->> 'ref'), '')) IS NOT NULL \ - )", - ) - .fetch_all(pool) - .await?; +async fn load_highways( + pool: &PgPool, + relation_refs_by_way: &HashMap>, +) -> Result, anyhow::Error> { + let relation_way_ids: Vec = relation_refs_by_way.keys().copied().collect(); + let rows: Vec = if relation_way_ids.is_empty() { + sqlx::query_as( + "SELECT way_id, highway, \ + NULLIF(TRIM(BOTH ';' FROM CONCAT_WS(';', NULLIF(BTRIM(ref), ''), NULLIF(BTRIM(tags ->> 'int_ref'), ''))), '') AS ref_text, \ + oneway, node_ids, \ + ST_AsGeoJSON(geom)::text \ + FROM osm2pgsql_v2_highways \ + WHERE highway IN ('motorway', 'motorway_link', 'trunk', 'trunk_link') \ + AND node_ids IS NOT NULL \ + AND array_length(node_ids, 1) >= 2", + ) + .fetch_all(pool) + .await? + } else { + sqlx::query_as( + "SELECT way_id, highway, \ + NULLIF(TRIM(BOTH ';' FROM CONCAT_WS(';', NULLIF(BTRIM(ref), ''), NULLIF(BTRIM(tags ->> 'int_ref'), ''))), '') AS ref_text, \ + oneway, node_ids, \ + ST_AsGeoJSON(geom)::text \ + FROM osm2pgsql_v2_highways \ + WHERE (highway IN ('motorway', 'motorway_link', 'trunk', 'trunk_link') \ + OR way_id = ANY($1)) \ + AND node_ids IS NOT NULL \ + AND array_length(node_ids, 1) >= 2", + ) + .bind(&relation_way_ids) + .fetch_all(pool) + .await? + }; let mut highways = Vec::with_capacity(rows.len()); for row in rows { - let (_way_id, highway_type, ref_raw, oneway_raw, node_ids, geojson) = row; - - // NOTE: Do NOT add motorway_link here. Ref'd motorway_links at - // interchanges bridge EB/WB carriageways, creating merged components - // that the directional split shatters into many fragments. Instead, - // use corridor-level bridge merge on non-terminal nodes (corridors.rs). - let refs: Vec = if highway_type == "motorway" || highway_type == "trunk" { - ref_raw - .as_deref() - .unwrap_or("") - .split(';') - .filter_map(|r| normalize_highway_ref(r.trim())) - .collect() - } else { - Vec::new() - }; + let (way_id, highway_type, ref_raw, oneway_raw, node_ids, geojson) = row; + + let mut refs: Vec = ref_raw + .as_deref() + .unwrap_or("") + .split(';') + .filter_map(|r| normalize_highway_ref(r.trim())) + .collect(); + if let Some(relation_refs) = relation_refs_by_way.get(&way_id) { + refs.extend(relation_refs.iter().cloned()); + } + refs.sort(); + refs.dedup(); let has_interstate_ref = refs .iter() .any(|reference| is_interstate_highway_ref(reference)); - let has_explicit_ref = ref_raw - .as_deref() - .map(str::trim) - .is_some_and(|value| !value.is_empty()); + let has_explicit_ref = !refs.is_empty(); - match highway_type.as_str() { - "motorway" if !has_interstate_ref && has_explicit_ref => continue, - "trunk" if !has_interstate_ref => continue, - _ => {} + if !has_interstate_ref && has_explicit_ref { + continue; } let geometry = parse_geojson_coords(&geojson); @@ -190,6 +221,7 @@ async fn load_highways(pool: &PgPool) -> Result, anyhow::Erro } highways.push(ParsedHighway { + way_id, refs, nodes, geometry: geom, diff --git a/crates/derive/src/graph/relation_corridors.rs b/crates/derive/src/graph/relation_corridors.rs new file mode 100644 index 0000000..9a3040a --- /dev/null +++ b/crates/derive/src/graph/relation_corridors.rs @@ -0,0 +1,2203 @@ +use std::cmp::Ordering; +use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet}; +use std::path::Path; + +use openinterstate_core::geo::haversine_distance; +use openinterstate_core::highway_ref::{is_interstate_highway_ref, normalize_highway_ref}; +use sqlx::PgPool; + +use crate::interstate_relations::{ + group_relation_members, load_interstate_relation_members, normalize_direction, + InterstateRouteGroup, +}; + +use super::corridors::BuildCorridorsStats; + +const CONNECTOR_HIGHWAY_TYPES: &[&str] = &[ + "motorway", + "motorway_link", + "trunk", + "trunk_link", + "primary", + "primary_link", + "secondary", + "secondary_link", + "tertiary", + "tertiary_link", + "service", +]; +const MIN_ROUTE_LENGTH_M: f64 = 50_000.0; +const SHORT_FALLBACK_CONNECTOR_MAX_COST_M: u64 = 10_000; +const DISCONNECTED_MICRO_SEGMENT_MAX_LENGTH_M: f64 = 2_000.0; +const DISCONNECTED_MICRO_SEGMENT_MAX_SHARE: f64 = 0.05; + +type HighwayRow = ( + i64, // way_id + String, // highway type + Option, // ref text + Option, // oneway + Vec, // node_ids + String, // geom as GeoJSON +); + +#[derive(Debug, Clone)] +struct RouteWay { + way_id: i64, + refs: Vec, + nodes: Vec, + geometry: Vec<(f64, f64)>, + highway_type: String, + is_oneway: bool, +} + +#[derive(Debug, Clone)] +struct ConnectorGraph { + adjacency: HashMap>, + node_coords: HashMap, + arc_coords: HashMap<(i64, i64), Vec<[f64; 2]>>, +} + +#[derive(Debug, Clone, Copy)] +struct ConnectorArc { + next: i64, + weight_m: u64, + way_id: i64, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +struct SearchState { + cost_m: u64, + node: i64, +} + +impl Ord for SearchState { + fn cmp(&self, other: &Self) -> Ordering { + other + .cost_m + .cmp(&self.cost_m) + .then_with(|| self.node.cmp(&other.node)) + } +} + +impl PartialOrd for SearchState { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[derive(Debug, Clone)] +struct ExitRow { + exit_id: String, + highway: String, + graph_node: i64, + ref_val: Option, + name: Option, + lat: f64, + lon: f64, +} + +#[derive(Debug, Clone)] +struct HighwayEdgeRow { + edge_id: String, + highway: String, + direction: Option, + source_way_ids: Vec, +} + +#[derive(Debug, Clone)] +struct CorridorDraft { + corridor_id: i32, + highway: String, + canonical_direction: String, + root_relation_id: i64, + geometry_json: String, + source_way_ids: Vec, + edge_ids: Vec, + exits: Vec, +} + +#[derive(Debug, Clone)] +struct CorridorExitDraft { + exit_id: String, + ref_val: Option, + name: Option, + lat: f64, + lon: f64, + sort_key_m: f64, +} + +#[derive(Debug, Default, Clone)] +struct RouteAssembly { + segments: Vec>, + source_way_ids: BTreeSet, +} + +#[derive(Debug, Clone)] +struct OrientedWay { + way_id: i64, + nodes: Vec, + geometry: Vec<[f64; 2]>, + preference_penalty: u8, +} + +#[derive(Debug, Clone)] +struct MemberAttachment { + points: Vec<[f64; 2]>, + tail_node: i64, + source_way_ids: Vec, + cost_m: u64, + preference_penalty: u8, +} + +#[derive(Debug, Clone)] +struct ConnectorPath { + target_node: i64, + points: Vec<[f64; 2]>, + way_ids: Vec, + cost_m: u64, +} + +pub async fn build_corridors( + pool: &PgPool, + interstate_relation_cache: Option<&Path>, +) -> Result { + let Some(cache_path) = interstate_relation_cache else { + tracing::warn!( + "No Interstate relation cache provided; falling back to legacy corridor builder" + ); + return super::corridors::build_corridors(pool).await; + }; + + let relation_members = load_interstate_relation_members(cache_path)?; + let route_groups = filter_route_groups(group_relation_members(&relation_members)); + if route_groups.is_empty() { + tracing::warn!( + "No Interstate relation groups available; falling back to legacy corridor builder" + ); + return super::corridors::build_corridors(pool).await; + } + + tracing::info!( + "Building relation-backed Interstate corridors from {} official route groups", + route_groups.len() + ); + + let ways_by_id = load_route_ways(pool, &relation_members).await?; + let connector_graph = build_connector_graph(ways_by_id.values().collect::>().as_slice()); + let highway_edges = load_highway_edges(pool).await?; + let exits = load_exit_rows(pool).await?; + + let mut exits_by_highway: HashMap> = HashMap::new(); + for exit in exits { + exits_by_highway + .entry(exit.highway.clone()) + .or_default() + .push(exit); + } + + let mut edge_rows_by_highway: HashMap> = HashMap::new(); + for edge in highway_edges { + edge_rows_by_highway + .entry(edge.highway.clone()) + .or_default() + .push(edge); + } + + let mut drafts = Vec::new(); + let mut next_corridor_id: i32 = 1; + for group in route_groups { + let Some(draft) = build_corridor_draft( + &group, + &ways_by_id, + &connector_graph, + edge_rows_by_highway + .get(&group.highway) + .map(Vec::as_slice) + .unwrap_or(&[]), + exits_by_highway + .get(&group.highway) + .map(Vec::as_slice) + .unwrap_or(&[]), + next_corridor_id, + )? + else { + continue; + }; + next_corridor_id += 1; + drafts.push(draft); + } + + write_corridors(pool, &drafts).await +} + +fn filter_route_groups(groups: Vec) -> Vec { + let directional_roots: HashSet<(String, i64)> = groups + .iter() + .filter(|group| group.direction.is_some()) + .map(|group| (group.highway.clone(), group.root_relation_id)) + .collect(); + + groups + .into_iter() + .filter(|group| { + group.direction.is_some() + || !directional_roots.contains(&(group.highway.clone(), group.root_relation_id)) + }) + .collect() +} + +async fn load_route_ways( + pool: &PgPool, + relation_members: &[crate::interstate_relations::InterstateRelationMember], +) -> Result, anyhow::Error> { + let relation_way_ids: Vec = relation_members + .iter() + .map(|member| member.way_id) + .collect(); + + let rows: Vec = sqlx::query_as( + "SELECT way_id, highway, \ + NULLIF(TRIM(BOTH ';' FROM CONCAT_WS(';', NULLIF(BTRIM(ref), ''), NULLIF(BTRIM(tags ->> 'int_ref'), ''))), '') AS ref_text, \ + oneway, node_ids, \ + ST_AsGeoJSON(geom)::text \ + FROM osm2pgsql_v2_highways \ + WHERE (highway IN ('motorway', 'motorway_link', 'trunk', 'trunk_link', 'primary', 'primary_link', \ + 'secondary', 'secondary_link', 'tertiary', 'tertiary_link', 'service') \ + OR way_id = ANY($1)) \ + AND node_ids IS NOT NULL \ + AND array_length(node_ids, 1) >= 2", + ) + .bind(&relation_way_ids) + .fetch_all(pool) + .await?; + + let mut ways_by_id = HashMap::with_capacity(rows.len()); + for (way_id, highway_type, ref_raw, oneway_raw, node_ids, geojson) in rows { + let refs: Vec = ref_raw + .as_deref() + .unwrap_or("") + .split(';') + .filter_map(|value| normalize_highway_ref(value.trim())) + .collect(); + let geometry = parse_geojson_coords(&geojson); + if geometry.len() < 2 || geometry.len() != node_ids.len() { + continue; + } + + let oneway_tag = oneway_raw.as_deref().unwrap_or(""); + let mut nodes = node_ids; + let mut geom = geometry; + let is_oneway = match oneway_tag { + "no" => false, + "-1" | "yes" | "1" => true, + _ => highway_type == "motorway" || highway_type == "motorway_link", + }; + if oneway_tag == "-1" { + nodes.reverse(); + geom.reverse(); + } + + ways_by_id.insert( + way_id, + RouteWay { + way_id, + refs, + nodes, + geometry: geom, + highway_type, + is_oneway, + }, + ); + } + + Ok(ways_by_id) +} + +async fn load_highway_edges(pool: &PgPool) -> Result, anyhow::Error> { + let rows: Vec<(String, String, Option, String)> = sqlx::query_as( + "SELECT id, highway, direction, source_way_ids_json \ + FROM highway_edges \ + WHERE highway LIKE 'I-%'", + ) + .fetch_all(pool) + .await?; + + Ok(rows + .into_iter() + .map( + |(edge_id, highway, direction, source_way_ids_json)| HighwayEdgeRow { + edge_id, + highway, + direction: direction.and_then(|value| normalize_direction(&value)), + source_way_ids: parse_way_ids_json(&source_way_ids_json), + }, + ) + .collect()) +} + +async fn load_exit_rows(pool: &PgPool) -> Result, anyhow::Error> { + let rows: Vec<( + String, + String, + i64, + Option, + Option, + f64, + f64, + )> = sqlx::query_as( + "SELECT ec.exit_id, ec.highway, ec.graph_node, e.ref, e.name, \ + ST_Y(e.geom) AS lat, ST_X(e.geom) AS lon \ + FROM exit_corridors ec \ + JOIN exits e ON e.id = ec.exit_id \ + WHERE ec.highway LIKE 'I-%'", + ) + .fetch_all(pool) + .await?; + + Ok(rows + .into_iter() + .map( + |(exit_id, highway, graph_node, ref_val, name, lat, lon)| ExitRow { + exit_id, + highway, + graph_node, + ref_val, + name, + lat, + lon, + }, + ) + .collect()) +} + +fn build_corridor_draft( + group: &InterstateRouteGroup, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, + edge_rows: &[HighwayEdgeRow], + exit_rows: &[ExitRow], + corridor_id: i32, +) -> Result, anyhow::Error> { + let canonical_direction = group + .direction + .clone() + .unwrap_or_else(|| "north".to_string()); + + let ordered_members = ordered_group_members(group, ways_by_id); + let mut assigned_way_ids: BTreeSet = group + .members + .iter() + .map(|member| member.way_id) + .filter(|way_id| ways_by_id.contains_key(way_id)) + .collect(); + if assigned_way_ids.is_empty() { + return Ok(None); + } + + adopt_relation_connector_paths( + &ordered_members, + &mut assigned_way_ids, + ways_by_id, + connector_graph, + ); + adopt_connector_paths( + &mut assigned_way_ids, + ways_by_id, + connector_graph, + &group.highway, + ); + + let assigned_ways: Vec<&RouteWay> = assigned_way_ids + .iter() + .filter_map(|way_id| ways_by_id.get(way_id)) + .collect(); + let route_segments = + prune_micro_route_segments(build_route_segments(&assigned_ways, &canonical_direction)); + + let total_length_m: f64 = route_segments + .iter() + .map(|segment| segment_length_m(segment)) + .sum(); + if total_length_m < MIN_ROUTE_LENGTH_M { + return Ok(None); + } + + let geometry_json = serialize_geometry_geojson(&route_segments)?; + let assigned_nodes: HashSet = assigned_ways + .iter() + .flat_map(|way| way.nodes.iter().copied()) + .collect(); + let exits = order_exits_along_route( + exit_rows + .iter() + .filter(|exit| assigned_nodes.contains(&exit.graph_node)) + .cloned() + .collect(), + &route_segments, + ); + let edge_ids = matched_edge_ids(edge_rows, &assigned_way_ids, Some(&canonical_direction)); + + let source_way_ids: Vec = assigned_way_ids.into_iter().collect(); + if route_segments.len() > 1 { + tracing::warn!( + "{} {} (relation {}) still exports as {} disconnected corridor segments", + group.highway, + canonical_direction, + group.root_relation_id, + route_segments.len() + ); + } else { + tracing::info!( + "{} {} (relation {}) resolved to a continuous corridor using {} ways", + group.highway, + canonical_direction, + group.root_relation_id, + source_way_ids.len() + ); + } + + Ok(Some(CorridorDraft { + corridor_id, + highway: group.highway.clone(), + canonical_direction, + root_relation_id: group.root_relation_id, + geometry_json, + source_way_ids, + edge_ids, + exits, + })) +} + +fn assemble_relation_geometry( + group: &InterstateRouteGroup, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, +) -> RouteAssembly { + let mut assembly = RouteAssembly::default(); + let mut current_segment = Vec::new(); + let mut current_tail = None; + let canonical_direction = group.direction.as_deref().unwrap_or("north"); + + let ordered_members: Vec<_> = group + .members + .iter() + .filter(|member| ways_by_id.contains_key(&member.way_id)) + .collect(); + + for (idx, member) in ordered_members.iter().enumerate() { + let next_member = ordered_members.get(idx + 1).copied(); + let attachment = if let Some(tail_node) = current_tail { + choose_member_attachment( + tail_node, + member, + ways_by_id, + connector_graph, + canonical_direction, + ) + } else { + choose_start_attachment( + member, + next_member, + ways_by_id, + connector_graph, + canonical_direction, + ) + }; + + let Some((attachment, starts_new_segment)) = attachment else { + continue; + }; + + if starts_new_segment && current_segment.len() >= 2 { + assembly.segments.push(current_segment); + current_segment = Vec::new(); + } + + append_points_dedup(&mut current_segment, &attachment.points); + assembly + .source_way_ids + .extend(attachment.source_way_ids.into_iter()); + current_tail = Some(attachment.tail_node); + } + + if current_segment.len() >= 2 { + assembly.segments.push(current_segment); + } + + assembly +} + +fn choose_start_attachment( + member: &crate::interstate_relations::InterstateRelationMember, + next_member: Option<&crate::interstate_relations::InterstateRelationMember>, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, + canonical_direction: &str, +) -> Option<(MemberAttachment, bool)> { + let way = ways_by_id.get(&member.way_id)?; + let candidates = oriented_way_candidates(way, member.role.as_deref(), canonical_direction); + + let best = candidates.into_iter().min_by(|a, b| { + let a_next_cost = next_member + .and_then(|next| { + estimate_transition_cost( + a.tail_node(), + next, + ways_by_id, + connector_graph, + canonical_direction, + ) + }) + .unwrap_or(0); + let b_next_cost = next_member + .and_then(|next| { + estimate_transition_cost( + b.tail_node(), + next, + ways_by_id, + connector_graph, + canonical_direction, + ) + }) + .unwrap_or(0); + + a_next_cost + .cmp(&b_next_cost) + .then_with(|| a.preference_penalty.cmp(&b.preference_penalty)) + .then_with(|| { + direction_match_rank(&a.geometry, canonical_direction) + .cmp(&direction_match_rank(&b.geometry, canonical_direction)) + }) + })?; + + Some(( + MemberAttachment { + points: best.geometry.clone(), + tail_node: best.tail_node(), + source_way_ids: vec![best.way_id], + cost_m: 0, + preference_penalty: best.preference_penalty, + }, + false, + )) +} + +fn choose_member_attachment( + current_tail: i64, + member: &crate::interstate_relations::InterstateRelationMember, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, + canonical_direction: &str, +) -> Option<(MemberAttachment, bool)> { + let way = ways_by_id.get(&member.way_id)?; + let candidates = oriented_way_candidates(way, member.role.as_deref(), canonical_direction); + if candidates.is_empty() { + return None; + } + + let best_connected = candidates + .iter() + .filter_map(|candidate| connect_candidate(current_tail, candidate, connector_graph)) + .min_by(|a, b| { + a.cost_m + .cmp(&b.cost_m) + .then_with(|| a.preference_penalty.cmp(&b.preference_penalty)) + }); + + if let Some(best) = best_connected { + return Some((best, false)); + } + + let best_standalone = candidates.into_iter().min_by(|a, b| { + a.preference_penalty + .cmp(&b.preference_penalty) + .then_with(|| { + direction_match_rank(&a.geometry, canonical_direction) + .cmp(&direction_match_rank(&b.geometry, canonical_direction)) + }) + })?; + + let tail_node = best_standalone.tail_node(); + let way_id = best_standalone.way_id; + let preference_penalty = best_standalone.preference_penalty; + Some(( + MemberAttachment { + points: best_standalone.geometry, + tail_node, + source_way_ids: vec![way_id], + cost_m: u64::MAX / 4, + preference_penalty, + }, + true, + )) +} + +fn estimate_transition_cost( + current_tail: i64, + member: &crate::interstate_relations::InterstateRelationMember, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, + canonical_direction: &str, +) -> Option { + let way = ways_by_id.get(&member.way_id)?; + oriented_way_candidates(way, member.role.as_deref(), canonical_direction) + .into_iter() + .filter_map(|candidate| { + if candidate.nodes.contains(¤t_tail) { + return Some(0); + } + shortest_connector_path_to_nodes(current_tail, &candidate.nodes, connector_graph) + .map(|path| path.cost_m) + }) + .min() +} + +fn connect_candidate( + current_tail: i64, + candidate: &OrientedWay, + connector_graph: &ConnectorGraph, +) -> Option { + if let Some(entry_idx) = find_node_index(&candidate.nodes, current_tail) { + let points = candidate.geometry.get(entry_idx..)?.to_vec(); + return Some(MemberAttachment { + points, + tail_node: candidate.tail_node(), + source_way_ids: vec![candidate.way_id], + cost_m: 0, + preference_penalty: candidate.preference_penalty, + }); + } + + let path = shortest_connector_path_to_nodes(current_tail, &candidate.nodes, connector_graph)?; + let entry_idx = find_node_index(&candidate.nodes, path.target_node)?; + let mut points = path.points; + points.extend(candidate.geometry.iter().skip(entry_idx).copied()); + let mut source_way_ids = path.way_ids; + source_way_ids.push(candidate.way_id); + + Some(MemberAttachment { + points, + tail_node: candidate.tail_node(), + source_way_ids, + cost_m: path.cost_m, + preference_penalty: candidate.preference_penalty, + }) +} + +fn oriented_way_candidates( + way: &RouteWay, + role: Option<&str>, + canonical_direction: &str, +) -> Vec { + if way.nodes.len() < 2 || way.nodes.len() != way.geometry.len() { + return Vec::new(); + } + + let role_hint = member_role_hint(role); + let forward_geometry: Vec<[f64; 2]> = + way.geometry.iter().map(|&(lat, lon)| [lat, lon]).collect(); + let mut candidates = vec![OrientedWay { + way_id: way.way_id, + nodes: way.nodes.clone(), + geometry: forward_geometry.clone(), + preference_penalty: orientation_preference_penalty( + &forward_geometry, + OrientationState::Forward, + role_hint, + canonical_direction, + ), + }]; + + if !way.is_oneway { + let mut reverse_nodes = way.nodes.clone(); + reverse_nodes.reverse(); + let mut reverse_geometry = forward_geometry; + reverse_geometry.reverse(); + candidates.push(OrientedWay { + way_id: way.way_id, + nodes: reverse_nodes, + geometry: reverse_geometry.clone(), + preference_penalty: orientation_preference_penalty( + &reverse_geometry, + OrientationState::Reverse, + role_hint, + canonical_direction, + ), + }); + } + + candidates +} + +fn append_points_dedup(segment: &mut Vec<[f64; 2]>, points: &[[f64; 2]]) { + let skip_first = usize::from( + !segment.is_empty() + && !points.is_empty() + && segment.last().copied() == points.first().copied(), + ); + segment.extend(points.iter().skip(skip_first).copied()); +} + +fn find_node_index(nodes: &[i64], node_id: i64) -> Option { + nodes.iter().position(|&candidate| candidate == node_id) +} + +fn build_connector_graph(ways: &[&RouteWay]) -> ConnectorGraph { + let mut adjacency: HashMap> = HashMap::new(); + let mut node_coords: HashMap = HashMap::new(); + let mut arc_coords: HashMap<(i64, i64), Vec<[f64; 2]>> = HashMap::new(); + for way in ways { + if !CONNECTOR_HIGHWAY_TYPES.contains(&way.highway_type.as_str()) { + continue; + } + if way.nodes.len() < 2 || way.nodes.len() != way.geometry.len() { + continue; + } + + for (idx, &node_id) in way.nodes.iter().enumerate() { + node_coords + .entry(node_id) + .or_insert([way.geometry[idx].0, way.geometry[idx].1]); + } + + for idx in 0..way.nodes.len() - 1 { + let start = way.nodes[idx]; + let end = way.nodes[idx + 1]; + let start_coord = [way.geometry[idx].0, way.geometry[idx].1]; + let end_coord = [way.geometry[idx + 1].0, way.geometry[idx + 1].1]; + let weight_m = + haversine_distance(start_coord[0], start_coord[1], end_coord[0], end_coord[1]) + .round() + .max(1.0) as u64; + + adjacency.entry(start).or_default().push(ConnectorArc { + next: end, + weight_m, + way_id: way.way_id, + }); + arc_coords + .entry((start, end)) + .or_insert_with(|| vec![start_coord, end_coord]); + if !way.is_oneway { + adjacency.entry(end).or_default().push(ConnectorArc { + next: start, + weight_m, + way_id: way.way_id, + }); + arc_coords + .entry((end, start)) + .or_insert_with(|| vec![end_coord, start_coord]); + } + } + } + + ConnectorGraph { + adjacency, + node_coords, + arc_coords, + } +} + +fn shortest_connector_path_to_nodes( + start: i64, + targets: &[i64], + connector_graph: &ConnectorGraph, +) -> Option { + if targets.is_empty() { + return None; + } + + let target_set: HashSet = targets.iter().copied().collect(); + let mut heap = BinaryHeap::new(); + let mut dist: HashMap = HashMap::new(); + let mut prev: HashMap = HashMap::new(); + + dist.insert(start, 0); + heap.push(SearchState { + cost_m: 0, + node: start, + }); + + while let Some(SearchState { cost_m, node }) = heap.pop() { + if cost_m > dist.get(&node).copied().unwrap_or(u64::MAX) { + continue; + } + if node != start && target_set.contains(&node) { + return reconstruct_connector_path(start, node, cost_m, &prev, connector_graph); + } + + let Some(arcs) = connector_graph.adjacency.get(&node) else { + continue; + }; + for arc in arcs { + let next_cost = cost_m.saturating_add(arc.weight_m); + if next_cost >= dist.get(&arc.next).copied().unwrap_or(u64::MAX) { + continue; + } + dist.insert(arc.next, next_cost); + prev.insert(arc.next, (node, arc.way_id)); + heap.push(SearchState { + cost_m: next_cost, + node: arc.next, + }); + } + } + + None +} + +fn reconstruct_connector_path( + start: i64, + target: i64, + cost_m: u64, + prev: &HashMap, + connector_graph: &ConnectorGraph, +) -> Option { + let mut node_path = vec![target]; + let mut way_ids = Vec::new(); + let mut current = target; + while current != start { + let &(prev_node, way_id) = prev.get(¤t)?; + node_path.push(prev_node); + way_ids.push(way_id); + current = prev_node; + } + node_path.reverse(); + way_ids.reverse(); + way_ids.dedup(); + + let mut points = Vec::new(); + for pair in node_path.windows(2) { + let coords = connector_graph + .arc_coords + .get(&(pair[0], pair[1])) + .cloned() + .or_else(|| { + connector_graph + .arc_coords + .get(&(pair[1], pair[0])) + .map(|coords| { + let mut reversed = coords.clone(); + reversed.reverse(); + reversed + }) + }) + .or_else(|| { + Some(vec![ + *connector_graph.node_coords.get(&pair[0])?, + *connector_graph.node_coords.get(&pair[1])?, + ]) + })?; + append_points_dedup(&mut points, &coords); + } + + Some(ConnectorPath { + target_node: target, + points, + way_ids, + cost_m, + }) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum OrientationState { + Forward, + Reverse, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum MemberRoleHint { + Forward, + Backward, + Cardinal, +} + +fn member_role_hint(role: Option<&str>) -> Option { + let role = role?.trim().to_ascii_lowercase(); + if role.is_empty() { + return None; + } + if role.contains("forward") { + return Some(MemberRoleHint::Forward); + } + if role.contains("backward") { + return Some(MemberRoleHint::Backward); + } + if role + .split(|ch: char| !ch.is_ascii_alphabetic()) + .any(|token| matches!(token, "north" | "south" | "east" | "west")) + { + return Some(MemberRoleHint::Cardinal); + } + None +} + +fn orientation_preference_penalty( + geometry: &[[f64; 2]], + orientation: OrientationState, + role_hint: Option, + canonical_direction: &str, +) -> u8 { + let mut penalty = 0; + match role_hint { + Some(MemberRoleHint::Forward) if orientation == OrientationState::Reverse => penalty += 1, + Some(MemberRoleHint::Backward) if orientation == OrientationState::Forward => penalty += 1, + Some(MemberRoleHint::Cardinal) + if !polyline_matches_direction(geometry, canonical_direction) => + { + penalty += 1; + } + _ => {} + } + if !polyline_matches_direction(geometry, canonical_direction) { + penalty += 1; + } + penalty +} + +fn direction_match_rank(geometry: &[[f64; 2]], canonical_direction: &str) -> u8 { + u8::from(!polyline_matches_direction(geometry, canonical_direction)) +} + +impl OrientedWay { + fn tail_node(&self) -> i64 { + *self.nodes.last().unwrap_or(&0) + } +} + +fn ordered_group_members<'a>( + group: &'a InterstateRouteGroup, + ways_by_id: &HashMap, +) -> Vec<&'a crate::interstate_relations::InterstateRelationMember> { + let members: Vec<_> = group + .members + .iter() + .filter(|member| ways_by_id.contains_key(&member.way_id)) + .collect(); + let reversed: Vec<_> = members.iter().rev().copied().collect(); + if sequence_break_score(&reversed, ways_by_id) < sequence_break_score(&members, ways_by_id) { + reversed + } else { + members + } +} + +fn sequence_break_score( + members: &[&crate::interstate_relations::InterstateRelationMember], + ways_by_id: &HashMap, +) -> usize { + let mut breaks = 0usize; + let mut prev_tail = None; + for member in members { + let Some(way) = ways_by_id.get(&member.way_id) else { + continue; + }; + if let Some(tail_node) = prev_tail { + if !way.nodes.contains(&tail_node) { + breaks += 1; + } + } + prev_tail = way.nodes.last().copied(); + } + breaks +} + +fn adopt_relation_connector_paths( + ordered_members: &[&crate::interstate_relations::InterstateRelationMember], + assigned_way_ids: &mut BTreeSet, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, +) { + loop { + let assigned_ways: Vec<&RouteWay> = assigned_way_ids + .iter() + .filter_map(|way_id| ways_by_id.get(way_id)) + .collect(); + let components = way_connected_components(&assigned_ways); + if components.len() <= 1 { + break; + } + + let mut component_by_node: HashMap = HashMap::new(); + for (component_id, nodes) in &components { + for &node_id in nodes { + component_by_node.insert(node_id, *component_id); + } + } + + let mut adopted_this_round = 0usize; + for pair in ordered_members.windows(2) { + let Some(prev_way) = ways_by_id.get(&pair[0].way_id) else { + continue; + }; + let Some(next_way) = ways_by_id.get(&pair[1].way_id) else { + continue; + }; + let allowed_refs = allowed_refs_for_pair(prev_way, next_way); + + let prev_components: HashSet = prev_way + .nodes + .iter() + .filter_map(|node_id| component_by_node.get(node_id).copied()) + .collect(); + let next_components: HashSet = next_way + .nodes + .iter() + .filter_map(|node_id| component_by_node.get(node_id).copied()) + .collect(); + if !prev_components.is_empty() + && !next_components.is_empty() + && prev_components + .iter() + .any(|component_id| next_components.contains(component_id)) + { + continue; + } + + let source_nodes: HashSet = prev_way.nodes.iter().copied().collect(); + let target_nodes: HashMap = next_way + .nodes + .iter() + .copied() + .map(|node_id| (node_id, 0)) + .collect(); + let Some((_cost_m, _target_component, path_way_ids)) = shortest_connector_path_to_any( + &source_nodes, + &target_nodes, + connector_graph, + ways_by_id, + &allowed_refs, + false, + None, + ) + .or_else(|| { + shortest_connector_path_to_any( + &source_nodes, + &target_nodes, + connector_graph, + ways_by_id, + &allowed_refs, + true, + Some(SHORT_FALLBACK_CONNECTOR_MAX_COST_M), + ) + }) else { + continue; + }; + + for way_id in path_way_ids { + adopted_this_round += usize::from(assigned_way_ids.insert(way_id)); + } + } + + if adopted_this_round == 0 { + break; + } + } +} + +fn adopt_connector_paths( + assigned_way_ids: &mut BTreeSet, + ways_by_id: &HashMap, + connector_graph: &ConnectorGraph, + route_highway: &str, +) { + let allowed_refs = allowed_refs_for_route(route_highway, assigned_way_ids, ways_by_id); + + loop { + let assigned_ways: Vec<&RouteWay> = assigned_way_ids + .iter() + .filter_map(|way_id| ways_by_id.get(way_id)) + .collect(); + let components = way_connected_components(&assigned_ways); + if components.len() <= 1 { + break; + } + + let mut best_path: Option<(u64, Vec)> = None; + for (source_component, source_nodes) in &components { + let mut target_nodes: HashMap = HashMap::new(); + for (target_component, nodes) in &components { + if target_component == source_component { + continue; + } + for &node_id in nodes { + target_nodes.insert(node_id, *target_component); + } + } + + let Some(candidate) = shortest_connector_path_to_any( + source_nodes, + &target_nodes, + connector_graph, + ways_by_id, + &allowed_refs, + false, + None, + ) + .or_else(|| { + shortest_connector_path_to_any( + source_nodes, + &target_nodes, + connector_graph, + ways_by_id, + &allowed_refs, + true, + Some(SHORT_FALLBACK_CONNECTOR_MAX_COST_M), + ) + }) else { + continue; + }; + if best_path + .as_ref() + .is_none_or(|(best_cost, _)| candidate.0 < *best_cost) + { + best_path = Some((candidate.0, candidate.2)); + } + } + + let Some((_cost_m, path_way_ids)) = best_path else { + break; + }; + + let mut adopted_this_round = 0usize; + for way_id in path_way_ids { + adopted_this_round += usize::from(assigned_way_ids.insert(way_id)); + } + if adopted_this_round == 0 { + break; + } + } +} + +fn shortest_connector_path_to_any( + sources: &HashSet, + targets_by_node: &HashMap, + connector_graph: &ConnectorGraph, + ways_by_id: &HashMap, + allowed_interstate_refs: &HashSet, + allow_short_high_class_fallback: bool, + max_cost_m: Option, +) -> Option<(u64, i32, Vec)> { + if sources.is_empty() || targets_by_node.is_empty() { + return None; + } + + let mut heap = BinaryHeap::new(); + let mut dist: HashMap = HashMap::new(); + let mut prev: HashMap = HashMap::new(); + + for &source in sources { + dist.insert(source, 0); + heap.push(SearchState { + cost_m: 0, + node: source, + }); + } + + while let Some(SearchState { cost_m, node }) = heap.pop() { + if cost_m > dist.get(&node).copied().unwrap_or(u64::MAX) { + continue; + } + if let Some(&target_component) = targets_by_node.get(&node) { + if !sources.contains(&node) { + let mut way_ids = Vec::new(); + let mut current = node; + while !sources.contains(¤t) { + let &(prev_node, way_id) = prev.get(¤t)?; + way_ids.push(way_id); + current = prev_node; + } + way_ids.reverse(); + way_ids.dedup(); + return Some((cost_m, target_component, way_ids)); + } + } + + let Some(arcs) = connector_graph.adjacency.get(&node) else { + continue; + }; + for arc in arcs { + let Some(way) = ways_by_id.get(&arc.way_id) else { + continue; + }; + if !connector_way_allowed_for_refs( + way, + allowed_interstate_refs, + allow_short_high_class_fallback, + ) { + continue; + } + let next_cost = cost_m.saturating_add(arc.weight_m); + if max_cost_m.is_some_and(|limit| next_cost > limit) { + continue; + } + if next_cost >= dist.get(&arc.next).copied().unwrap_or(u64::MAX) { + continue; + } + dist.insert(arc.next, next_cost); + prev.insert(arc.next, (node, arc.way_id)); + heap.push(SearchState { + cost_m: next_cost, + node: arc.next, + }); + } + } + + None +} + +fn way_connected_components(ways: &[&RouteWay]) -> Vec<(i32, HashSet)> { + let mut adjacency: HashMap> = HashMap::new(); + for way in ways { + for pair in way.nodes.windows(2) { + adjacency.entry(pair[0]).or_default().insert(pair[1]); + adjacency.entry(pair[1]).or_default().insert(pair[0]); + } + } + + let mut components = Vec::new(); + let mut component_idx = 0_i32; + let mut visited: HashSet = HashSet::new(); + let mut sorted_nodes: Vec = adjacency.keys().copied().collect(); + sorted_nodes.sort_unstable(); + + for seed in sorted_nodes { + if !visited.insert(seed) { + continue; + } + + let mut frontier = vec![seed]; + let mut component_nodes = HashSet::from([seed]); + while let Some(node) = frontier.pop() { + let Some(neighbors) = adjacency.get(&node) else { + continue; + }; + for &next in neighbors { + if visited.insert(next) { + component_nodes.insert(next); + frontier.push(next); + } + } + } + + components.push((component_idx, component_nodes)); + component_idx += 1; + } + + components +} + +fn build_route_segments(ways: &[&RouteWay], canonical_direction: &str) -> Vec> { + if ways.is_empty() { + return Vec::new(); + } + + let mut node_coords: HashMap = HashMap::new(); + let mut directed_adjacency: HashMap> = HashMap::new(); + let mut undirected_adjacency: HashMap> = HashMap::new(); + let mut arc_coords: HashMap<(i64, i64), Vec<[f64; 2]>> = HashMap::new(); + + for way in ways { + if way.nodes.len() < 2 || way.nodes.len() != way.geometry.len() { + continue; + } + + for (idx, &node_id) in way.nodes.iter().enumerate() { + let coord = [way.geometry[idx].0, way.geometry[idx].1]; + node_coords.entry(node_id).or_insert(coord); + } + + for idx in 0..way.nodes.len() - 1 { + let start = way.nodes[idx]; + let end = way.nodes[idx + 1]; + let start_coord = [way.geometry[idx].0, way.geometry[idx].1]; + let end_coord = [way.geometry[idx + 1].0, way.geometry[idx + 1].1]; + let weight_m = + haversine_distance(start_coord[0], start_coord[1], end_coord[0], end_coord[1]); + + directed_adjacency + .entry(start) + .or_default() + .push((end, weight_m)); + arc_coords + .entry((start, end)) + .or_insert_with(|| vec![start_coord, end_coord]); + if !way.is_oneway { + directed_adjacency + .entry(end) + .or_default() + .push((start, weight_m)); + arc_coords + .entry((end, start)) + .or_insert_with(|| vec![end_coord, start_coord]); + } + + undirected_adjacency.entry(start).or_default().insert(end); + undirected_adjacency.entry(end).or_default().insert(start); + } + } + + let component_by_node = compute_components(&undirected_adjacency); + let mut nodes_by_component: HashMap> = HashMap::new(); + for (&node_id, &component) in &component_by_node { + nodes_by_component + .entry(component) + .or_default() + .push(node_id); + } + + let mut segments = Vec::new(); + let mut component_keys: Vec = nodes_by_component.keys().copied().collect(); + component_keys.sort_unstable(); + for component in component_keys { + let Some(nodes) = nodes_by_component.get(&component) else { + continue; + }; + let Some(path_nodes) = best_component_path( + nodes, + &node_coords, + &directed_adjacency, + &undirected_adjacency, + canonical_direction, + ) else { + continue; + }; + let mut points = Vec::new(); + for pair in path_nodes.windows(2) { + let coords = arc_coords.get(&(pair[0], pair[1])).cloned().or_else(|| { + arc_coords.get(&(pair[1], pair[0])).map(|coords| { + let mut reversed = coords.clone(); + reversed.reverse(); + reversed + }) + }); + let Some(coords) = coords else { + continue; + }; + append_points_dedup(&mut points, &coords); + } + if points.len() >= 2 { + if !polyline_matches_direction(&points, canonical_direction) { + points.reverse(); + } + segments.push(points); + } + } + + segments.sort_by(|a, b| { + let a_key = segment_sort_key(a, canonical_direction); + let b_key = segment_sort_key(b, canonical_direction); + if matches!(canonical_direction, "south" | "west") { + b_key.partial_cmp(&a_key).unwrap_or(Ordering::Equal) + } else { + a_key.partial_cmp(&b_key).unwrap_or(Ordering::Equal) + } + }); + segments +} + +fn prune_micro_route_segments(segments: Vec>) -> Vec> { + if segments.len() <= 1 { + return segments; + } + + let longest_segment_m = segments + .iter() + .map(|segment| segment_length_m(segment)) + .fold(0.0_f64, f64::max); + if longest_segment_m <= 0.0 { + return segments; + } + + segments + .into_iter() + .filter(|segment| { + let length_m = segment_length_m(segment); + length_m >= DISCONNECTED_MICRO_SEGMENT_MAX_LENGTH_M + || length_m >= longest_segment_m * DISCONNECTED_MICRO_SEGMENT_MAX_SHARE + }) + .collect() +} + +fn best_component_path( + nodes: &[i64], + node_coords: &HashMap, + directed_adjacency: &HashMap>, + undirected_adjacency: &HashMap>, + canonical_direction: &str, +) -> Option> { + if nodes.len() < 2 { + return None; + } + + let mut terminal_nodes: Vec = nodes + .iter() + .copied() + .filter(|node| undirected_adjacency.get(node).map_or(0, HashSet::len) <= 1) + .collect(); + if terminal_nodes.len() < 2 { + terminal_nodes = nodes.to_vec(); + } + + let low_node = *terminal_nodes.iter().min_by(|a, b| { + projection_for_direction( + node_coords.get(a).copied().unwrap_or([0.0, 0.0]), + canonical_direction, + ) + .partial_cmp(&projection_for_direction( + node_coords.get(b).copied().unwrap_or([0.0, 0.0]), + canonical_direction, + )) + .unwrap_or(Ordering::Equal) + })?; + let high_node = *terminal_nodes.iter().max_by(|a, b| { + projection_for_direction( + node_coords.get(a).copied().unwrap_or([0.0, 0.0]), + canonical_direction, + ) + .partial_cmp(&projection_for_direction( + node_coords.get(b).copied().unwrap_or([0.0, 0.0]), + canonical_direction, + )) + .unwrap_or(Ordering::Equal) + })?; + let (start_node, end_node) = if matches!(canonical_direction, "south" | "west") { + (high_node, low_node) + } else { + (low_node, high_node) + }; + if start_node == end_node { + return None; + } + + shortest_path(start_node, end_node, directed_adjacency).or_else(|| { + shortest_path_undirected(start_node, end_node, undirected_adjacency, node_coords) + }) +} + +fn shortest_path( + start: i64, + end: i64, + adjacency: &HashMap>, +) -> Option> { + let mut heap = BinaryHeap::new(); + let mut dist: HashMap = HashMap::new(); + let mut prev: HashMap = HashMap::new(); + + dist.insert(start, 0); + heap.push(SearchState { + cost_m: 0, + node: start, + }); + + while let Some(SearchState { cost_m, node }) = heap.pop() { + if node == end { + break; + } + if cost_m > dist.get(&node).copied().unwrap_or(u64::MAX) { + continue; + } + let Some(neighbors) = adjacency.get(&node) else { + continue; + }; + for &(next, weight_m) in neighbors { + let next_cost = cost_m.saturating_add(weight_m.round().max(1.0) as u64); + if next_cost >= dist.get(&next).copied().unwrap_or(u64::MAX) { + continue; + } + dist.insert(next, next_cost); + prev.insert(next, node); + heap.push(SearchState { + cost_m: next_cost, + node: next, + }); + } + } + + if !dist.contains_key(&end) { + return None; + } + + let mut path = vec![end]; + let mut current = end; + while current != start { + current = *prev.get(¤t)?; + path.push(current); + } + path.reverse(); + Some(path) +} + +fn shortest_path_undirected( + start: i64, + end: i64, + adjacency: &HashMap>, + node_coords: &HashMap, +) -> Option> { + let mut weighted: HashMap> = HashMap::new(); + for (&node, neighbors) in adjacency { + for &next in neighbors { + let coord_a = node_coords.get(&node).copied().unwrap_or([0.0, 0.0]); + let coord_b = node_coords.get(&next).copied().unwrap_or([0.0, 0.0]); + let weight_m = haversine_distance(coord_a[0], coord_a[1], coord_b[0], coord_b[1]); + weighted.entry(node).or_default().push((next, weight_m)); + } + } + shortest_path(start, end, &weighted) +} + +fn compute_components(adjacency: &HashMap>) -> HashMap { + let mut component_by_node = HashMap::new(); + let mut sorted_nodes: Vec = adjacency.keys().copied().collect(); + sorted_nodes.sort_unstable(); + let mut component_idx = 0_i32; + + for seed in sorted_nodes { + if component_by_node.contains_key(&seed) { + continue; + } + + let mut frontier = vec![seed]; + component_by_node.insert(seed, component_idx); + while let Some(node) = frontier.pop() { + let Some(neighbors) = adjacency.get(&node) else { + continue; + }; + for &next in neighbors { + if component_by_node.insert(next, component_idx).is_none() { + frontier.push(next); + } + } + } + component_idx += 1; + } + + component_by_node +} + +fn order_exits_along_route( + exits: Vec, + segments: &[Vec<[f64; 2]>], +) -> Vec { + let cumulative_points = cumulative_segment_points(segments); + let mut ordered: Vec = exits + .into_iter() + .map(|exit| CorridorExitDraft { + exit_id: exit.exit_id, + ref_val: exit.ref_val, + name: exit.name, + lat: exit.lat, + lon: exit.lon, + sort_key_m: closest_distance_along_route(&cumulative_points, [exit.lat, exit.lon]), + }) + .collect(); + + ordered.sort_by(|a, b| { + a.sort_key_m + .partial_cmp(&b.sort_key_m) + .unwrap_or(Ordering::Equal) + .then_with(|| a.exit_id.cmp(&b.exit_id)) + }); + ordered +} + +fn cumulative_segment_points(segments: &[Vec<[f64; 2]>]) -> Vec<([f64; 2], f64)> { + let mut result = Vec::new(); + let mut cumulative_m = 0.0; + for segment in segments { + for (idx, &point) in segment.iter().enumerate() { + if idx > 0 { + let prev = segment[idx - 1]; + cumulative_m += haversine_distance(prev[0], prev[1], point[0], point[1]); + } + result.push((point, cumulative_m)); + } + } + result +} + +fn closest_distance_along_route(route_points: &[([f64; 2], f64)], target: [f64; 2]) -> f64 { + route_points + .iter() + .map(|(point, distance_m)| { + ( + haversine_distance(point[0], point[1], target[0], target[1]), + *distance_m, + ) + }) + .min_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal)) + .map(|(_, distance_m)| distance_m) + .unwrap_or(0.0) +} + +fn matched_edge_ids( + edge_rows: &[HighwayEdgeRow], + assigned_way_ids: &BTreeSet, + canonical_direction: Option<&str>, +) -> Vec { + let wanted_direction = canonical_direction.and_then(normalize_direction); + let mut edge_ids = Vec::new(); + for edge in edge_rows { + if let Some(direction) = &wanted_direction { + if edge.direction.as_deref() != Some(direction.as_str()) { + continue; + } + } + if edge + .source_way_ids + .iter() + .any(|way_id| assigned_way_ids.contains(way_id)) + { + edge_ids.push(edge.edge_id.clone()); + } + } + edge_ids.sort(); + edge_ids.dedup(); + edge_ids +} + +async fn write_corridors( + pool: &PgPool, + drafts: &[CorridorDraft], +) -> Result { + let mut tx = pool.begin().await?; + sqlx::query("DELETE FROM corridor_exits") + .execute(&mut *tx) + .await?; + sqlx::query("DELETE FROM corridors") + .execute(&mut *tx) + .await?; + sqlx::query("UPDATE highway_edges SET corridor_id = NULL") + .execute(&mut *tx) + .await?; + + for draft in drafts { + sqlx::query( + "INSERT INTO corridors \ + (corridor_id, highway, canonical_direction, root_relation_id, geometry_json, source_way_ids_json) \ + VALUES ($1, $2, $3, $4, $5, $6)", + ) + .bind(draft.corridor_id) + .bind(&draft.highway) + .bind(&draft.canonical_direction) + .bind(draft.root_relation_id) + .bind(&draft.geometry_json) + .bind(serde_json::to_string(&draft.source_way_ids).unwrap_or_else(|_| "[]".to_string())) + .execute(&mut *tx) + .await?; + + for (idx, exit) in draft.exits.iter().enumerate() { + sqlx::query( + "INSERT INTO corridor_exits \ + (corridor_id, corridor_index, exit_id, ref, name, lat, lon) \ + VALUES ($1, $2, $3, $4, $5, $6, $7)", + ) + .bind(draft.corridor_id) + .bind(idx as i32) + .bind(&exit.exit_id) + .bind(&exit.ref_val) + .bind(&exit.name) + .bind(exit.lat) + .bind(exit.lon) + .execute(&mut *tx) + .await?; + } + } + + let mut edges_updated = 0usize; + for draft in drafts { + for edge_id in &draft.edge_ids { + let result = sqlx::query("UPDATE highway_edges SET corridor_id = $2 WHERE id = $1") + .bind(edge_id) + .bind(draft.corridor_id) + .execute(&mut *tx) + .await?; + edges_updated += result.rows_affected() as usize; + } + } + + tx.commit().await?; + + Ok(BuildCorridorsStats { + corridors_created: drafts.len(), + corridor_exits_created: drafts.iter().map(|draft| draft.exits.len()).sum(), + edges_updated, + }) +} + +fn serialize_geometry_geojson(segments: &[Vec<[f64; 2]>]) -> Result { + if segments.is_empty() { + return Ok("{\"type\":\"LineString\",\"coordinates\":[]}".to_string()); + } + + if segments.len() == 1 { + return Ok(serde_json::to_string(&serde_json::json!({ + "type": "LineString", + "coordinates": segments[0] + .iter() + .map(|point| vec![point[1], point[0]]) + .collect::>(), + }))?); + } + + Ok(serde_json::to_string(&serde_json::json!({ + "type": "MultiLineString", + "coordinates": segments + .iter() + .map(|segment| { + segment + .iter() + .map(|point| vec![point[1], point[0]]) + .collect::>() + }) + .collect::>(), + }))?) +} + +fn segment_length_m(segment: &[[f64; 2]]) -> f64 { + segment + .windows(2) + .map(|pair| haversine_distance(pair[0][0], pair[0][1], pair[1][0], pair[1][1])) + .sum() +} + +fn segment_sort_key(segment: &[[f64; 2]], canonical_direction: &str) -> f64 { + let first = segment.first().copied().unwrap_or([0.0, 0.0]); + let last = segment.last().copied().unwrap_or(first); + match canonical_direction { + "east" | "west" => first[1].min(last[1]), + _ => first[0].min(last[0]), + } +} + +fn projection_for_direction(point: [f64; 2], canonical_direction: &str) -> f64 { + match canonical_direction { + "east" | "west" => point[1], + _ => point[0], + } +} + +fn polyline_matches_direction(polyline: &[[f64; 2]], canonical_direction: &str) -> bool { + if polyline.len() < 2 { + return true; + } + let first = polyline[0]; + let last = polyline[polyline.len() - 1]; + match canonical_direction { + "north" => last[0] > first[0], + "south" => last[0] < first[0], + "east" => last[1] > first[1], + "west" => last[1] < first[1], + _ => true, + } +} + +fn parse_geojson_coords(raw: &str) -> Vec<(f64, f64)> { + serde_json::from_str::(raw) + .ok() + .and_then(|value| value.get("coordinates").cloned()) + .and_then(|coords| serde_json::from_value::>>(coords).ok()) + .unwrap_or_default() + .into_iter() + .filter_map(|pair| { + if pair.len() >= 2 { + Some((pair[1], pair[0])) + } else { + None + } + }) + .collect() +} + +fn parse_way_ids_json(raw: &str) -> Vec { + serde_json::from_str::>(raw).unwrap_or_default() +} + +impl RouteWay { + fn has_interstate_ref(&self) -> bool { + self.refs + .iter() + .any(|reference| is_interstate_highway_ref(reference)) + } +} + +fn allowed_refs_for_pair(prev_way: &RouteWay, next_way: &RouteWay) -> HashSet { + prev_way + .refs + .iter() + .chain(next_way.refs.iter()) + .cloned() + .collect() +} + +fn allowed_refs_for_route( + route_highway: &str, + assigned_way_ids: &BTreeSet, + ways_by_id: &HashMap, +) -> HashSet { + let mut allowed_refs = HashSet::from([route_highway.to_string()]); + for way_id in assigned_way_ids { + let Some(way) = ways_by_id.get(way_id) else { + continue; + }; + allowed_refs.extend(way.refs.iter().cloned()); + } + allowed_refs +} + +fn connector_way_allowed_for_refs( + way: &RouteWay, + allowed_refs: &HashSet, + allow_short_high_class_fallback: bool, +) -> bool { + way.refs.is_empty() + || way + .refs + .iter() + .any(|reference| allowed_refs.contains(reference)) + || (allow_short_high_class_fallback + && matches!( + way.highway_type.as_str(), + "motorway" | "motorway_link" | "trunk" | "trunk_link" + )) +} + +#[cfg(test)] +mod tests { + use std::collections::{BTreeSet, HashMap, HashSet}; + + use super::{ + allowed_refs_for_pair, allowed_refs_for_route, build_connector_graph, + connector_way_allowed_for_refs, prune_micro_route_segments, shortest_connector_path_to_any, + RouteWay, SHORT_FALLBACK_CONNECTOR_MAX_COST_M, + }; + + fn route_way( + way_id: i64, + refs: &[&str], + nodes: &[i64], + geometry: &[(f64, f64)], + highway_type: &str, + is_oneway: bool, + ) -> RouteWay { + RouteWay { + way_id, + refs: refs.iter().map(|value| (*value).to_string()).collect(), + nodes: nodes.to_vec(), + geometry: geometry.to_vec(), + highway_type: highway_type.to_string(), + is_oneway, + } + } + + #[test] + fn connector_policy_allows_same_highway_interstate_ways() { + let way = route_way( + 10, + &["I-96"], + &[1, 2, 3], + &[(42.0, -83.0), (42.0, -82.999), (42.0, -82.998)], + "motorway", + true, + ); + + assert!(connector_way_allowed_for_refs( + &way, + &HashSet::from(["I-96".to_string()]), + false, + )); + } + + #[test] + fn connector_policy_rejects_other_interstate_ways() { + let way = route_way( + 11, + &["I-69"], + &[1, 2], + &[(42.0, -83.0), (42.0, -82.999)], + "motorway", + true, + ); + + assert!(!connector_way_allowed_for_refs( + &way, + &HashSet::from(["I-96".to_string()]), + false, + )); + } + + #[test] + fn pairwise_policy_inherits_official_concurrency_refs() { + let prev_way = route_way( + 20, + &["I-20", "I-55"], + &[1, 2], + &[(32.0, -90.2), (32.0, -90.19)], + "motorway", + true, + ); + let next_way = route_way( + 21, + &["I-20", "I-55"], + &[3, 4], + &[(32.0, -90.18), (32.0, -90.17)], + "motorway", + true, + ); + let bridge_way = route_way( + 22, + &["I-55"], + &[2, 3], + &[(32.0, -90.19), (32.0, -90.18)], + "motorway", + true, + ); + + let allowed_refs = allowed_refs_for_pair(&prev_way, &next_way); + + assert!(allowed_refs.contains("I-20")); + assert!(allowed_refs.contains("I-55")); + assert!(connector_way_allowed_for_refs( + &bridge_way, + &allowed_refs, + false, + )); + } + + #[test] + fn pairwise_policy_allows_shared_non_interstate_concurrency_refs() { + let prev_way = route_way( + 30, + &["I-99", "US-220"], + &[1, 2], + &[(41.0, -77.6), (41.01, -77.59)], + "motorway", + true, + ); + let next_way = route_way( + 31, + &["I-99", "US-220"], + &[3, 4], + &[(41.02, -77.58), (41.03, -77.57)], + "motorway", + true, + ); + let bridge_way = route_way( + 32, + &["I-80", "US-220"], + &[2, 5, 3], + &[(41.01, -77.59), (41.015, -77.585), (41.02, -77.58)], + "motorway", + true, + ); + + let allowed_refs = allowed_refs_for_pair(&prev_way, &next_way); + + assert!(allowed_refs.contains("I-99")); + assert!(allowed_refs.contains("US-220")); + assert!(connector_way_allowed_for_refs( + &bridge_way, + &allowed_refs, + false, + )); + } + + #[test] + fn route_policy_collects_all_assigned_refs() { + let ways_by_id = HashMap::from([ + ( + 40_i64, + route_way( + 40, + &["I-99", "US-220"], + &[1, 2], + &[(41.0, -77.6), (41.01, -77.59)], + "motorway", + true, + ), + ), + ( + 41_i64, + route_way( + 41, + &["I-99", "US-15"], + &[3, 4], + &[(41.2, -77.2), (41.21, -77.19)], + "motorway", + true, + ), + ), + ]); + let assigned = BTreeSet::from([40_i64, 41_i64]); + + let allowed_refs = allowed_refs_for_route("I-99", &assigned, &ways_by_id); + + assert!(allowed_refs.contains("I-99")); + assert!(allowed_refs.contains("US-220")); + assert!(allowed_refs.contains("US-15")); + } + + #[test] + fn shortest_connector_path_can_use_same_highway_interstate_gap_fill() { + let bridge_way = route_way( + 12, + &["I-96"], + &[1, 2, 3], + &[(42.0, -83.0), (42.0, -82.999), (42.0, -82.998)], + "motorway", + true, + ); + let ways_by_id = HashMap::from([(bridge_way.way_id, bridge_way.clone())]); + let graph = build_connector_graph(&[ways_by_id.get(&bridge_way.way_id).unwrap()]); + let sources = HashSet::from([1_i64]); + let targets = HashMap::from([(3_i64, 1_i32)]); + + let result = shortest_connector_path_to_any( + &sources, + &targets, + &graph, + &ways_by_id, + &HashSet::from(["I-96".to_string()]), + false, + None, + ); + + assert_eq!(result.map(|(_, _, way_ids)| way_ids), Some(vec![12])); + } + + #[test] + fn short_fallback_allows_short_high_class_connector_with_other_ref() { + let source_way = route_way( + 1, + &["I-12"], + &[1, 2], + &[(30.0, -91.11), (30.0, -91.10)], + "motorway", + true, + ); + let target_way = route_way( + 2, + &["I-12"], + &[5, 6], + &[(30.0, -91.09), (30.0, -91.08)], + "motorway", + true, + ); + let connector_way = route_way( + 3, + &["I-10"], + &[2, 3, 4, 5], + &[ + (30.0, -91.10), + (30.0, -91.097), + (30.0, -91.094), + (30.0, -91.09), + ], + "motorway", + true, + ); + let ways_by_id = HashMap::from([ + (source_way.way_id, source_way.clone()), + (target_way.way_id, target_way.clone()), + (connector_way.way_id, connector_way.clone()), + ]); + let graph = build_connector_graph(&[ + ways_by_id.get(&source_way.way_id).unwrap(), + ways_by_id.get(&target_way.way_id).unwrap(), + ways_by_id.get(&connector_way.way_id).unwrap(), + ]); + let sources = HashSet::from([2_i64]); + let targets = HashMap::from([(5_i64, 0_i32)]); + let allowed_refs = HashSet::from(["I-12".to_string()]); + + assert!(shortest_connector_path_to_any( + &sources, + &targets, + &graph, + &ways_by_id, + &allowed_refs, + false, + None, + ) + .is_none()); + + let result = shortest_connector_path_to_any( + &sources, + &targets, + &graph, + &ways_by_id, + &allowed_refs, + true, + Some(SHORT_FALLBACK_CONNECTOR_MAX_COST_M), + ) + .expect("short fallback path should be available"); + assert_eq!(result.2, vec![3]); + } + + #[test] + fn short_fallback_rejects_long_high_class_connector() { + let source_way = route_way( + 1, + &["I-12"], + &[1, 2], + &[(30.0, -91.20), (30.0, -91.19)], + "motorway", + true, + ); + let target_way = route_way( + 2, + &["I-12"], + &[5, 6], + &[(30.0, -91.00), (30.0, -90.99)], + "motorway", + true, + ); + let connector_way = route_way( + 3, + &["I-10"], + &[2, 3, 4, 5], + &[ + (30.0, -91.19), + (30.0, -91.13), + (30.0, -91.06), + (30.0, -91.00), + ], + "motorway", + true, + ); + let ways_by_id = HashMap::from([ + (source_way.way_id, source_way.clone()), + (target_way.way_id, target_way.clone()), + (connector_way.way_id, connector_way.clone()), + ]); + let graph = build_connector_graph(&[ + ways_by_id.get(&source_way.way_id).unwrap(), + ways_by_id.get(&target_way.way_id).unwrap(), + ways_by_id.get(&connector_way.way_id).unwrap(), + ]); + let sources = HashSet::from([2_i64]); + let targets = HashMap::from([(5_i64, 0_i32)]); + let allowed_refs = HashSet::from(["I-12".to_string()]); + + assert!(shortest_connector_path_to_any( + &sources, + &targets, + &graph, + &ways_by_id, + &allowed_refs, + true, + Some(SHORT_FALLBACK_CONNECTOR_MAX_COST_M), + ) + .is_none()); + } + + #[test] + fn prune_micro_route_segments_drops_tiny_detached_component() { + let kept = prune_micro_route_segments(vec![ + vec![[42.0, -79.88], [42.0, -79.0]], + vec![[42.15, -79.386], [42.153, -79.375]], + ]); + + assert_eq!(kept.len(), 1); + } +} diff --git a/crates/derive/src/interstate_relations.rs b/crates/derive/src/interstate_relations.rs new file mode 100644 index 0000000..8b8927c --- /dev/null +++ b/crates/derive/src/interstate_relations.rs @@ -0,0 +1,224 @@ +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +use anyhow::Context; +use openinterstate_core::highway_ref::{is_interstate_highway_ref, normalize_highway_ref}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InterstateRelationMember { + pub way_id: i64, + pub highway: String, + pub root_relation_id: i64, + pub leaf_relation_id: i64, + pub direction: Option, + pub role: Option, + pub sequence_index: usize, +} + +#[derive(Debug, Clone)] +pub struct InterstateRouteGroup { + pub highway: String, + pub root_relation_id: i64, + pub direction: Option, + pub members: Vec, +} + +pub fn load_interstate_relation_members( + path: &Path, +) -> Result, anyhow::Error> { + let contents = fs::read_to_string(path) + .with_context(|| format!("reading Interstate relation cache {}", path.display()))?; + + let mut members = Vec::new(); + for (line_index, line) in contents.lines().enumerate() { + if line_index == 0 && line.starts_with("way_id\t") { + continue; + } + if line.trim().is_empty() { + continue; + } + + let fields: Vec<&str> = line.split('\t').collect(); + if fields.len() < 5 { + continue; + } + + let Ok(way_id) = fields[0].parse::() else { + continue; + }; + let Some(highway) = normalize_highway_ref(fields[1].trim()) else { + continue; + }; + if !is_interstate_highway_ref(&highway) { + continue; + } + let Ok(root_relation_id) = fields[2].parse::() else { + continue; + }; + let Ok(leaf_relation_id) = fields[3].parse::() else { + continue; + }; + let direction = normalize_direction(fields.get(4).copied().unwrap_or_default()); + let role = fields + .get(5) + .map(|value| value.trim()) + .filter(|value| !value.is_empty()) + .map(ToString::to_string); + let sequence_index = fields + .get(6) + .and_then(|value| value.parse::().ok()) + .unwrap_or(0); + + members.push(InterstateRelationMember { + way_id, + highway, + root_relation_id, + leaf_relation_id, + direction, + role, + sequence_index, + }); + } + + Ok(members) +} + +pub fn load_relation_refs_by_way(path: &Path) -> Result>, anyhow::Error> { + let members = load_interstate_relation_members(path)?; + Ok(relation_refs_by_way(&members)) +} + +pub fn relation_refs_by_way(members: &[InterstateRelationMember]) -> HashMap> { + let mut refs_by_way: HashMap> = HashMap::new(); + for member in members { + refs_by_way + .entry(member.way_id) + .or_default() + .push(member.highway.clone()); + } + + for refs in refs_by_way.values_mut() { + refs.sort(); + refs.dedup(); + } + + refs_by_way +} + +pub fn group_relation_members(members: &[InterstateRelationMember]) -> Vec { + let mut members_by_group: HashMap< + (String, i64, Option), + Vec, + > = HashMap::new(); + + for member in members { + members_by_group + .entry(( + member.highway.clone(), + member.root_relation_id, + member.direction.clone(), + )) + .or_default() + .push(member.clone()); + } + + let mut groups: Vec = members_by_group + .into_iter() + .map( + |((highway, root_relation_id, direction), mut group_members)| { + group_members.sort_by(|a, b| { + a.sequence_index + .cmp(&b.sequence_index) + .then_with(|| a.leaf_relation_id.cmp(&b.leaf_relation_id)) + .then_with(|| a.way_id.cmp(&b.way_id)) + }); + InterstateRouteGroup { + highway, + root_relation_id, + direction, + members: group_members, + } + }, + ) + .collect(); + + groups.sort_by(|a, b| { + interstate_number(&a.highway) + .cmp(&interstate_number(&b.highway)) + .then_with(|| a.highway.cmp(&b.highway)) + .then_with(|| a.root_relation_id.cmp(&b.root_relation_id)) + .then_with(|| a.direction.cmp(&b.direction)) + }); + groups +} + +pub fn normalize_direction(raw: &str) -> Option { + let value = raw.trim().to_ascii_lowercase(); + match value.as_str() { + "" => None, + "north" | "northbound" | "n" => Some("north".to_string()), + "south" | "southbound" | "s" => Some("south".to_string()), + "east" | "eastbound" | "e" => Some("east".to_string()), + "west" | "westbound" | "w" => Some("west".to_string()), + _ => None, + } +} + +fn interstate_number(highway: &str) -> i32 { + highway + .strip_prefix("I-") + .and_then(|value| value.parse::().ok()) + .unwrap_or(0) +} + +#[cfg(test)] +mod tests { + use super::{group_relation_members, normalize_direction, InterstateRelationMember}; + + #[test] + fn normalizes_compass_direction_variants() { + assert_eq!(normalize_direction("north"), Some("north".to_string())); + assert_eq!(normalize_direction("Northbound"), Some("north".to_string())); + assert_eq!(normalize_direction("S"), Some("south".to_string())); + assert_eq!(normalize_direction(""), None); + } + + #[test] + fn groups_members_by_root_and_direction() { + let groups = group_relation_members(&[ + InterstateRelationMember { + way_id: 2, + highway: "I-95".to_string(), + root_relation_id: 10, + leaf_relation_id: 10, + direction: Some("north".to_string()), + role: None, + sequence_index: 1, + }, + InterstateRelationMember { + way_id: 1, + highway: "I-95".to_string(), + root_relation_id: 10, + leaf_relation_id: 10, + direction: Some("north".to_string()), + role: None, + sequence_index: 0, + }, + InterstateRelationMember { + way_id: 3, + highway: "I-95".to_string(), + root_relation_id: 10, + leaf_relation_id: 11, + direction: Some("south".to_string()), + role: None, + sequence_index: 0, + }, + ]); + + assert_eq!(groups.len(), 2); + assert_eq!(groups[0].members[0].way_id, 1); + assert_eq!(groups[0].members[1].way_id, 2); + assert_eq!(groups[1].members[0].way_id, 3); + } +} diff --git a/crates/derive/src/main.rs b/crates/derive/src/main.rs index a8a61a9..33ea9f7 100644 --- a/crates/derive/src/main.rs +++ b/crates/derive/src/main.rs @@ -1,7 +1,10 @@ mod canonical_types; mod graph; +mod interstate_relations; mod routes; +use std::path::{Path, PathBuf}; + use clap::{Parser, Subcommand}; use sqlx::PgPool; @@ -16,6 +19,10 @@ struct Cli { #[arg(long, env = "DATABASE_URL")] database_url: String, + /// Optional cached Interstate route relation membership file. + #[arg(long, env = "INTERSTATE_RELATION_CACHE")] + interstate_relation_cache: Option, + #[command(subcommand)] command: Command, } @@ -26,9 +33,9 @@ enum Command { All, /// Rebuild highway_edges and exit_corridors from canonical osm2pgsql tables. Graph, - /// Rebuild corridors and corridor_exits from highway_edges. + /// Rebuild official Interstate corridors and corridor_exits. Corridors, - /// Rebuild reference_routes and reference_route_anchors from corridors. + /// Rebuild reference_routes and reference_route_anchors from official corridors. Routes, } @@ -48,14 +55,19 @@ fn init_tracing() { } async fn run(cli: Cli) -> anyhow::Result<()> { - let pool = connect_pool(&cli.database_url).await?; + let Cli { + database_url, + interstate_relation_cache, + command, + } = cli; + let pool = connect_pool(&database_url).await?; tracing::info!("Connected to database"); - match cli.command { - Command::All => run_all(&pool).await, - Command::Graph => run_graph(&pool).await, - Command::Corridors => run_corridors(&pool).await, - Command::Routes => run_routes(&pool).await, + match command { + Command::All => run_all(&pool, interstate_relation_cache.as_deref()).await, + Command::Graph => run_graph(&pool, interstate_relation_cache.as_deref()).await, + Command::Corridors => run_corridors(&pool, interstate_relation_cache.as_deref()).await, + Command::Routes => run_routes(&pool, interstate_relation_cache.as_deref()).await, } } @@ -66,22 +78,25 @@ async fn connect_pool(database_url: &str) -> anyhow::Result { .await?) } -async fn run_all(pool: &PgPool) -> anyhow::Result<()> { - run_graph(pool).await?; - run_corridors(pool).await?; - run_routes(pool).await +async fn run_all(pool: &PgPool, interstate_relation_cache: Option<&Path>) -> anyhow::Result<()> { + run_graph(pool, interstate_relation_cache).await?; + run_corridors(pool, interstate_relation_cache).await?; + run_routes(pool, interstate_relation_cache).await } -async fn run_graph(pool: &PgPool) -> anyhow::Result<()> { +async fn run_graph(pool: &PgPool, interstate_relation_cache: Option<&Path>) -> anyhow::Result<()> { tracing::info!("Building highway graph from canonical osm2pgsql tables"); - let edge_count = graph::build_graph(pool).await?; + let edge_count = graph::build_graph(pool, interstate_relation_cache).await?; tracing::info!("Graph build complete: {} edges", edge_count); Ok(()) } -async fn run_corridors(pool: &PgPool) -> anyhow::Result<()> { - tracing::info!("Building corridors from highway_edges"); - let stats = graph::corridors::build_corridors(pool).await?; +async fn run_corridors( + pool: &PgPool, + interstate_relation_cache: Option<&Path>, +) -> anyhow::Result<()> { + tracing::info!("Building Interstate corridors"); + let stats = graph::relation_corridors::build_corridors(pool, interstate_relation_cache).await?; tracing::info!( "Corridor build complete: {} corridors, {} exits, {} edges updated", stats.corridors_created, @@ -91,9 +106,9 @@ async fn run_corridors(pool: &PgPool) -> anyhow::Result<()> { Ok(()) } -async fn run_routes(pool: &PgPool) -> anyhow::Result<()> { +async fn run_routes(pool: &PgPool, interstate_relation_cache: Option<&Path>) -> anyhow::Result<()> { tracing::info!("Building reference routes from corridors"); - routes::build_reference_routes(pool).await?; + routes::build_reference_routes(pool, interstate_relation_cache).await?; tracing::info!("Reference route build complete"); Ok(()) } diff --git a/crates/derive/src/routes.rs b/crates/derive/src/routes.rs index 8f9b416..9c6e75a 100644 --- a/crates/derive/src/routes.rs +++ b/crates/derive/src/routes.rs @@ -1,5 +1,6 @@ use std::cmp::Ordering; use std::collections::{HashMap, HashSet, VecDeque}; +use std::path::Path; use openinterstate_core::geo::{bearing, haversine_distance, to_degrees, to_radians, EARTH_RADIUS}; use sha2::{Digest, Sha256}; @@ -12,6 +13,9 @@ const INTERVAL_S: f64 = 5.0; const STEP_M: f64 = SPEED_MPS * INTERVAL_S; const LANE_OFFSET_M: f64 = 3.5; const ANCHOR_STEP_POINTS: usize = 40; +const ROUTE_GAP_BREAK_M: f64 = 10_000.0; +const DISCONNECTED_MICRO_SEGMENT_MAX_LENGTH_M: f64 = 2_000.0; +const DISCONNECTED_MICRO_SEGMENT_MAX_SHARE: f64 = 0.05; #[derive(Debug, Clone)] struct CorridorEdge { @@ -28,6 +32,12 @@ struct CorridorInfo { canonical_direction: String, } +#[derive(Debug, Clone)] +struct CorridorSegment { + points: Vec<[f64; 2]>, + length_m: f64, +} + #[derive(Debug, Clone)] struct RouteRow { id: String, @@ -61,11 +71,14 @@ struct AnchorRow { lon: f64, } -pub async fn build_reference_routes(pool: &PgPool) -> anyhow::Result<()> { - // Load interstate corridors - let corridor_rows: Vec<(i32, String, Option)> = sqlx::query_as( - "SELECT corridor_id, highway, canonical_direction \ - FROM corridors WHERE highway LIKE 'I-%'", +pub async fn build_reference_routes( + pool: &PgPool, + _interstate_relation_cache: Option<&Path>, +) -> anyhow::Result<()> { + let corridor_rows: Vec<(i32, String, Option, String)> = sqlx::query_as( + "SELECT corridor_id, highway, canonical_direction, geometry_json \ + FROM corridors \ + WHERE highway LIKE 'I-%'", ) .fetch_all(pool) .await?; @@ -76,45 +89,20 @@ pub async fn build_reference_routes(pool: &PgPool) -> anyhow::Result<()> { return Ok(()); } - let corridors: Vec = corridor_rows + let corridor_geometry_rows: Vec<(CorridorInfo, String)> = corridor_rows .into_iter() - .filter_map(|(id, highway, dir)| { - Some(CorridorInfo { - corridor_id: id, - highway, - canonical_direction: dir?, - }) + .filter_map(|(id, highway, dir, geometry_json)| { + Some(( + CorridorInfo { + corridor_id: id, + highway, + canonical_direction: dir?, + }, + geometry_json, + )) }) .collect(); - - // Load edges for all interstate corridors - let edge_rows: Vec<(i32, i64, i64, i32, String)> = sqlx::query_as( - "SELECT corridor_id, start_node, end_node, length_m, polyline_json \ - FROM highway_edges \ - WHERE corridor_id IS NOT NULL AND highway LIKE 'I-%'", - ) - .fetch_all(pool) - .await?; - - let mut edges_by_corridor: HashMap> = HashMap::new(); - for (corridor_id, start_node, end_node, length_m, polyline_json) in edge_rows { - let polyline = parse_polyline_json(&polyline_json); - if polyline.len() < 2 { - continue; - } - edges_by_corridor - .entry(corridor_id) - .or_default() - .push(CorridorEdge { - start_node, - end_node, - polyline, - length_m: length_m as f64, - }); - } - - let mut routes = build_corridor_routes(&corridors, &edges_by_corridor)?; - collapse_same_highway_corridors(&mut routes)?; + let mut routes = build_geometry_backed_routes(&corridor_geometry_rows)?; // Assign variant ranks let mut rank_map: HashMap<(String, String), i32> = HashMap::new(); @@ -197,6 +185,182 @@ pub async fn build_reference_routes(pool: &PgPool) -> anyhow::Result<()> { Ok(()) } +fn build_geometry_backed_routes( + corridors: &[(CorridorInfo, String)], +) -> anyhow::Result> { + let mut routes = Vec::new(); + + for (corridor, geometry_json) in corridors { + let direction_code = canonical_to_direction_code(&corridor.canonical_direction); + let mut route_segments = parse_geometry_json_segments(geometry_json); + route_segments.retain(|segment| segment.len() >= 2); + if route_segments.is_empty() { + continue; + } + + let mut sampled_segments = Vec::new(); + for mut segment in route_segments { + if !polyline_matches_direction(&segment, direction_code) { + segment.reverse(); + } + let sampled = resample(&segment, STEP_M); + let offset = apply_lane_offset(&sampled, LANE_OFFSET_M); + if offset.len() >= 2 { + sampled_segments.push(offset); + } + } + if sampled_segments.is_empty() { + continue; + } + sampled_segments = prune_geometry_micro_segments(sampled_segments); + if sampled_segments.is_empty() { + continue; + } + + let distance_m: f64 = sampled_segments + .iter() + .map(|segment| segment_length_m(segment)) + .sum(); + if distance_m < MIN_LENGTH_M { + continue; + } + + let waypoints: Vec<[f64; 2]> = sampled_segments + .iter() + .flat_map(|segment| segment.iter().copied()) + .collect(); + let bounds = bounds_for_points(&waypoints); + let start = waypoints.first().copied().unwrap_or([0.0, 0.0]); + let end = waypoints.last().copied().unwrap_or([0.0, 0.0]); + let waypoints_json = serialize_route_waypoints(&sampled_segments)?; + let label = direction_label(direction_code).to_string(); + + let route_id = deterministic_route_id( + &corridor.highway, + direction_code, + corridor.corridor_id, + &waypoints_json, + ) + .to_string(); + + routes.push(RouteRow { + id: route_id, + highway: corridor.highway.clone(), + direction_code: direction_code.to_string(), + direction_label: label.clone(), + display_name: format!("{} {}", corridor.highway, label), + corridor_id: corridor.corridor_id, + variant_rank: 0, + distance_m, + duration_s: waypoints.len() as f64 * INTERVAL_S, + interval_s: INTERVAL_S, + point_count: waypoints.len() as i32, + start_lat: start[0], + start_lon: start[1], + end_lat: end[0], + end_lon: end[1], + min_lat: bounds.0, + max_lat: bounds.1, + min_lon: bounds.2, + max_lon: bounds.3, + waypoints_json, + waypoints, + }); + } + + routes.sort_by(|a, b| { + let num_a = interstate_number(&a.highway); + let num_b = interstate_number(&b.highway); + num_a + .cmp(&num_b) + .then_with(|| a.highway.cmp(&b.highway)) + .then_with(|| a.direction_code.cmp(&b.direction_code)) + .then_with(|| { + b.distance_m + .partial_cmp(&a.distance_m) + .unwrap_or(Ordering::Equal) + }) + }); + + Ok(routes) +} + +fn parse_geometry_json_segments(raw: &str) -> Vec> { + let Ok(value) = serde_json::from_str::(raw) else { + return Vec::new(); + }; + let Some(geometry_type) = value.get("type").and_then(|value| value.as_str()) else { + return Vec::new(); + }; + + match geometry_type { + "LineString" => value + .get("coordinates") + .and_then(parse_linestring_coords) + .map(|segment| vec![segment]) + .unwrap_or_default(), + "MultiLineString" => value + .get("coordinates") + .and_then(|coords| coords.as_array()) + .map(|segments| { + segments + .iter() + .filter_map(parse_linestring_coords) + .collect::>() + }) + .unwrap_or_default(), + _ => Vec::new(), + } +} + +fn parse_linestring_coords(value: &serde_json::Value) -> Option> { + let coords = value.as_array()?; + let segment: Vec<[f64; 2]> = coords + .iter() + .filter_map(|pair| { + let pair = pair.as_array()?; + let lon = pair.first()?.as_f64()?; + let lat = pair.get(1)?.as_f64()?; + Some([lat, lon]) + }) + .collect(); + if segment.len() >= 2 { + Some(segment) + } else { + None + } +} + +fn segment_length_m(segment: &[[f64; 2]]) -> f64 { + segment + .windows(2) + .map(|pair| haversine_distance(pair[0][0], pair[0][1], pair[1][0], pair[1][1])) + .sum() +} + +fn prune_geometry_micro_segments(segments: Vec>) -> Vec> { + if segments.len() <= 1 { + return segments; + } + + let longest_segment_m = segments + .iter() + .map(|segment| segment_length_m(segment)) + .fold(0.0_f64, f64::max); + if longest_segment_m <= 0.0 { + return segments; + } + + segments + .into_iter() + .filter(|segment| { + let length_m = segment_length_m(segment); + length_m >= DISCONNECTED_MICRO_SEGMENT_MAX_LENGTH_M + || length_m >= longest_segment_m * DISCONNECTED_MICRO_SEGMENT_MAX_SHARE + }) + .collect() +} + async fn clear_tables(pool: &PgPool) -> anyhow::Result<()> { let mut tx = pool.begin().await?; sqlx::query("DELETE FROM reference_route_anchors") @@ -232,16 +396,27 @@ fn build_corridor_routes( let direction_code = canonical_to_direction_code(&corridor.canonical_direction); - let segments = walk_corridor_segments(edges, direction_code); + let segments = prune_micro_segments(walk_corridor_segments(edges, direction_code)); if segments.is_empty() { continue; } + let original_segment_count = segments.len(); + if original_segment_count > 1 { + tracing::info!( + "Preserving disconnected reference route {} {} (corridor {}) as {} segments", + corridor.highway, + direction_code, + corridor.corridor_id, + original_segment_count + ); + } // Resample + lane offset each segment independently, then concatenate. // This avoids interpolating across geographic gaps between disconnected // corridor components (which would create points over water/land). - let mut waypoints: Vec<[f64; 2]> = Vec::new(); - for mut seg in segments { + let mut route_segments: Vec> = Vec::new(); + for segment in segments { + let mut seg = segment.points; if seg.len() < 2 { continue; } @@ -251,14 +426,43 @@ fn build_corridor_routes( } let sampled = resample(&seg, STEP_M); let offset = apply_lane_offset(&sampled, LANE_OFFSET_M); - waypoints.extend(offset); + if offset.len() >= 2 { + route_segments.push(offset); + } } + let original_point_count: usize = route_segments.iter().map(Vec::len).sum(); + route_segments = sanitize_route_segments(route_segments, direction_code); + let sanitized_point_count: usize = route_segments.iter().map(Vec::len).sum(); + if route_segments.is_empty() { + continue; + } + if route_segments.len() != original_segment_count + || sanitized_point_count < original_point_count + { + tracing::warn!( + "Sanitized {} {} reference route (corridor {}) from {} segments / {} points to {} segments / {} points after removing large gaps", + corridor.highway, + direction_code, + corridor.corridor_id, + original_segment_count, + original_point_count, + route_segments.len(), + sanitized_point_count, + ); + } + let waypoints: Vec<[f64; 2]> = route_segments + .iter() + .flat_map(|segment| segment.iter().copied()) + .collect(); if waypoints.len() < 2 { continue; } - let distance_m = *cumulative_distances(&waypoints).last().unwrap_or(&0.0); + let distance_m: f64 = route_segments + .iter() + .map(|segment| *cumulative_distances(segment).last().unwrap_or(&0.0)) + .sum(); if distance_m < 1.0 { continue; } @@ -266,7 +470,7 @@ fn build_corridor_routes( let bounds = bounds_for_points(&waypoints); let start = waypoints.first().copied().unwrap_or([0.0, 0.0]); let end = waypoints.last().copied().unwrap_or([0.0, 0.0]); - let waypoints_json = serde_json::to_string(&waypoints)?; + let waypoints_json = serialize_route_waypoints(&route_segments)?; let label = direction_label(&direction_code).to_string(); let route_id = deterministic_route_id( @@ -323,7 +527,7 @@ fn build_corridor_routes( /// /// Returns one polyline per connected component, sorted along the travel /// axis. Each segment is independently valid — no interpolation across gaps. -fn walk_corridor_segments(edges: &[CorridorEdge], direction_code: &str) -> Vec> { +fn walk_corridor_segments(edges: &[CorridorEdge], direction_code: &str) -> Vec { if edges.is_empty() { return Vec::new(); } @@ -364,7 +568,7 @@ fn walk_corridor_segments(edges: &[CorridorEdge], direction_code: &str) -> Vec)> = Vec::new(); + let mut component_polylines: Vec<(f64, CorridorSegment)> = Vec::new(); for comp_edges in &components { if comp_edges.is_empty() { @@ -438,6 +642,7 @@ fn walk_corridor_segments(edges: &[CorridorEdge], direction_code: &str) -> Vec = comp_edges.iter().map(|&i| &edges[i]).collect(); + let component_length_m: f64 = comp_edge_list.iter().map(|edge| edge.length_m).sum(); let mut points: Vec<[f64; 2]> = Vec::new(); for &(from, to, local_idx) in &steps { let edge = comp_edge_list[local_idx]; @@ -457,7 +662,13 @@ fn walk_corridor_segments(edges: &[CorridorEdge], direction_code: &str) -> Vec= 2 { let sort_key = projection(&points[0]).min(projection(points.last().unwrap())); - component_polylines.push((sort_key, points)); + component_polylines.push(( + sort_key, + CorridorSegment { + points, + length_m: component_length_m, + }, + )); } } @@ -465,17 +676,126 @@ fn walk_corridor_segments(edges: &[CorridorEdge], direction_code: &str) -> Vec= 2) + .map(|(_, segment)| segment) + .filter(|segment| segment.points.len() >= 2) .collect() } +fn prune_micro_segments(segments: Vec) -> Vec { + if segments.len() <= 1 { + return segments; + } + + let longest_segment_m = segments + .iter() + .map(|segment| segment.length_m) + .fold(0.0_f64, f64::max); + if longest_segment_m <= 0.0 { + return segments; + } + + segments + .into_iter() + .filter(|segment| { + segment.length_m >= DISCONNECTED_MICRO_SEGMENT_MAX_LENGTH_M + || segment.length_m >= longest_segment_m * DISCONNECTED_MICRO_SEGMENT_MAX_SHARE + }) + .collect() +} + +fn sanitize_route_segments( + segments: Vec>, + direction_code: &str, +) -> Vec> { + let mut chunks: Vec> = segments + .into_iter() + .flat_map(|segment| split_waypoints_on_large_gaps(&segment, ROUTE_GAP_BREAK_M)) + .filter(|segment| segment.len() >= 2) + .collect(); + if chunks.is_empty() { + return Vec::new(); + } + + for chunk in &mut chunks { + if !polyline_matches_direction(chunk, direction_code) { + chunk.reverse(); + } + } + + let descending = matches!(direction_code, "WB" | "SB"); + chunks.sort_by(|a, b| { + let a_key = chunk_sort_key(a, direction_code); + let b_key = chunk_sort_key(b, direction_code); + if descending { + b_key.partial_cmp(&a_key).unwrap_or(Ordering::Equal) + } else { + a_key.partial_cmp(&b_key).unwrap_or(Ordering::Equal) + } + }); + + chunks +} + +fn serialize_route_waypoints(segments: &[Vec<[f64; 2]>]) -> anyhow::Result { + match segments { + [] => Ok("[]".to_string()), + [segment] => Ok(serde_json::to_string(segment)?), + _ => Ok(serde_json::to_string(segments)?), + } +} + +fn split_waypoints_on_large_gaps(waypoints: &[[f64; 2]], gap_m: f64) -> Vec> { + if waypoints.len() < 2 { + return Vec::new(); + } + + let mut chunks: Vec> = Vec::new(); + let mut current = vec![waypoints[0]]; + + for &point in &waypoints[1..] { + let prev = *current.last().unwrap(); + let gap = haversine_distance(prev[0], prev[1], point[0], point[1]); + if gap > gap_m { + if current.len() >= 2 { + chunks.push(current); + } + current = vec![point]; + continue; + } + current.push(point); + } + + if current.len() >= 2 { + chunks.push(current); + } + + chunks +} + +fn chunk_sort_key(chunk: &[[f64; 2]], direction_code: &str) -> f64 { + let first = chunk.first().copied().unwrap_or([0.0, 0.0]); + let last = chunk.last().copied().unwrap_or(first); + + match direction_code { + "EB" | "WB" => first[1].min(last[1]), + _ => first[0].min(last[0]), + } +} + /// Find connected components in the edge graph (undirected). /// Returns Vec> where each inner vec is a set of edge indices. fn connected_edge_components(edges: &[CorridorEdge]) -> Vec> { @@ -557,119 +877,6 @@ fn bfs_shortest_path( None } -/// Merge corridors for the same (highway, direction_code) into a single route. -/// -/// Geographically disconnected interstates (I-76 PA vs CO) will have separate -/// corridors. We merge them by sorting along the travel axis and concatenating. -fn collapse_same_highway_corridors(routes: &mut Vec) -> anyhow::Result<()> { - let mut grouped: HashMap<(String, String), Vec> = HashMap::new(); - for route in std::mem::take(routes) { - grouped - .entry((route.highway.clone(), route.direction_code.clone())) - .or_default() - .push(route); - } - - let mut collapsed: Vec = Vec::new(); - for ((highway, direction_code), mut group) in grouped { - if group.len() == 1 { - collapsed.push(group.remove(0)); - continue; - } - - // Sort corridors by geographic position along the travel axis - let descending = matches!(direction_code.as_str(), "WB" | "SB"); - group.sort_by(|a, b| { - let a_key = axis_sort_key(&direction_code, a); - let b_key = axis_sort_key(&direction_code, b); - if descending { - b_key.partial_cmp(&a_key).unwrap_or(Ordering::Equal) - } else { - a_key.partial_cmp(&b_key).unwrap_or(Ordering::Equal) - } - }); - - let mut merged_waypoints: Vec<[f64; 2]> = Vec::new(); - for route in &group { - if route.waypoints.is_empty() { - continue; - } - if merged_waypoints.is_empty() { - merged_waypoints.extend(route.waypoints.iter().copied()); - continue; - } - // Skip first point if very close to last merged point (avoid duplicate) - let append_from = match ( - merged_waypoints.last().copied(), - route.waypoints.first().copied(), - ) { - (Some(last), Some(first)) => { - if haversine_distance(last[0], last[1], first[0], first[1]) < 35.0 { - 1 - } else { - 0 - } - } - _ => 0, - }; - merged_waypoints.extend(route.waypoints.iter().skip(append_from).copied()); - } - - if merged_waypoints.len() < 2 { - continue; - } - - let distance_m = *cumulative_distances(&merged_waypoints) - .last() - .unwrap_or(&0.0); - if distance_m < 1.0 { - continue; - } - - let bounds = bounds_for_points(&merged_waypoints); - let start = merged_waypoints.first().copied().unwrap_or([0.0, 0.0]); - let end = merged_waypoints.last().copied().unwrap_or([0.0, 0.0]); - let waypoints_json = serde_json::to_string(&merged_waypoints)?; - let label = direction_label(&direction_code).to_string(); - - collapsed.push(RouteRow { - id: deterministic_route_id(&highway, &direction_code, 0, &waypoints_json).to_string(), - highway: highway.clone(), - direction_code: direction_code.clone(), - direction_label: label.clone(), - display_name: format!("{} {}", highway, label), - corridor_id: 0, - variant_rank: 0, - distance_m, - duration_s: merged_waypoints.len() as f64 * INTERVAL_S, - interval_s: INTERVAL_S, - point_count: merged_waypoints.len() as i32, - start_lat: start[0], - start_lon: start[1], - end_lat: end[0], - end_lon: end[1], - min_lat: bounds.0, - max_lat: bounds.1, - min_lon: bounds.2, - max_lon: bounds.3, - waypoints_json, - waypoints: merged_waypoints, - }); - } - - collapsed.sort_by(|a, b| { - let num_a = interstate_number(&a.highway); - let num_b = interstate_number(&b.highway); - num_a - .cmp(&num_b) - .then_with(|| a.highway.cmp(&b.highway)) - .then_with(|| a.direction_code.cmp(&b.direction_code)) - }); - - *routes = collapsed; - Ok(()) -} - // ============================================================================ // Helpers // ============================================================================ @@ -701,15 +908,6 @@ fn interstate_number(highway: &str) -> i32 { .unwrap_or(0) } -fn axis_sort_key(direction_code: &str, route: &RouteRow) -> f64 { - match direction_code { - "WB" => route.start_lon.max(route.end_lon), - "EB" => route.start_lon.min(route.end_lon), - "SB" => route.start_lat.max(route.end_lat), - _ => route.start_lat.min(route.end_lat), // NB - } -} - /// Check whether the polyline's overall heading is consistent with the /// direction code. Uses the corridor's dominant axis: NB/SB check /// latitude increase/decrease, EB/WB check longitude increase/decrease. @@ -744,6 +942,31 @@ fn parse_polyline_json(raw: &str) -> Vec<[f64; 2]> { .collect() } +fn corridor_edge_from_row( + corridor_highway: &str, + edge_highway: &str, + start_node: i64, + end_node: i64, + length_m: i32, + polyline_json: &str, +) -> Option { + if corridor_highway != edge_highway { + return None; + } + + let polyline = parse_polyline_json(polyline_json); + if polyline.len() < 2 { + return None; + } + + Some(CorridorEdge { + start_node, + end_node, + polyline, + length_m: length_m as f64, + }) +} + fn cumulative_distances(points: &[[f64; 2]]) -> Vec { let mut dists = Vec::with_capacity(points.len()); dists.push(0.0); @@ -969,7 +1192,7 @@ mod tests { ]; let segments = walk_corridor_segments(&edges, "NB"); assert_eq!(segments.len(), 1); - let poly = &segments[0]; + let poly = &segments[0].points; assert_eq!(poly.len(), 3); assert_eq!(poly[0], [30.0, -87.0]); assert_eq!(poly[2], [31.0, -87.0]); @@ -1008,8 +1231,144 @@ mod tests { // NB axis extremes: min lat 30.0 (node 1) → max lat 31.5 (node 5) let segments = walk_corridor_segments(&edges, "NB"); assert_eq!(segments.len(), 1); - let poly = &segments[0]; + let poly = &segments[0].points; assert_eq!(poly.first().unwrap()[0], 30.0); assert_eq!(poly.last().unwrap()[0], 31.5); } + + #[test] + fn prune_micro_segments_drops_tiny_detached_component() { + let segments = vec![ + CorridorSegment { + points: vec![[32.0, -117.0], [49.0, -122.0]], + length_m: 2_200_000.0, + }, + CorridorSegment { + points: vec![[32.54, -117.03], [32.543, -117.031]], + length_m: 641.0, + }, + ]; + + let kept = prune_micro_segments(segments); + + assert_eq!(kept.len(), 1); + assert_eq!(kept[0].length_m, 2_200_000.0); + } + + #[test] + fn prune_micro_segments_keeps_legitimate_disjoint_mainlines() { + let segments = vec![ + CorridorSegment { + points: vec![[41.0, -122.0], [44.0, -119.0]], + length_m: 372_000.0, + }, + CorridorSegment { + points: vec![[44.0, -119.0], [46.0, -111.0]], + length_m: 1_237_000.0, + }, + ]; + + let kept = prune_micro_segments(segments); + + assert_eq!(kept.len(), 2); + } + + #[test] + fn build_corridor_routes_preserves_disconnected_routes_as_segments() { + let corridors = vec![CorridorInfo { + corridor_id: 1, + highway: "I-84".to_string(), + canonical_direction: "east".to_string(), + }]; + let edges = vec![ + CorridorEdge { + start_node: 1, + end_node: 2, + polyline: vec![[41.0, -122.0], [42.0, -121.0]], + length_m: 120_000.0, + }, + CorridorEdge { + start_node: 3, + end_node: 4, + polyline: vec![[45.0, -75.0], [46.0, -74.0]], + length_m: 120_000.0, + }, + ]; + let edges_by_corridor = HashMap::from([(1, edges)]); + + let routes = build_corridor_routes(&corridors, &edges_by_corridor).unwrap(); + + assert_eq!(routes.len(), 1); + let route = &routes[0]; + let segments: Vec> = serde_json::from_str(&route.waypoints_json).unwrap(); + assert_eq!(segments.len(), 2); + assert_eq!(route.direction_code, "EB"); + assert!(route.distance_m > 200_000.0); + } + + #[test] + fn sanitize_route_segments_preserves_far_chunks_as_distinct_segments() { + let segments = vec![vec![ + [25.0, -80.0], + [25.05, -80.01], + [25.2, -80.2], + [25.25, -80.21], + ]]; + + let cleaned = sanitize_route_segments(segments, "NB"); + + assert_eq!( + cleaned, + vec![ + vec![[25.0, -80.0], [25.05, -80.01]], + vec![[25.2, -80.2], [25.25, -80.21]], + ] + ); + } + + #[test] + fn sanitize_route_segments_reorders_southbound_chunks() { + let segments = vec![vec![ + [35.0, -80.0], + [34.95, -80.0], + [35.12, -80.0], + [35.07, -80.0], + ]]; + + let cleaned = sanitize_route_segments(segments, "SB"); + + assert_eq!( + cleaned, + vec![ + vec![[35.12, -80.0], [35.07, -80.0]], + vec![[35.0, -80.0], [34.95, -80.0]], + ] + ); + } + + #[test] + fn split_waypoints_on_large_gaps_breaks_far_apart_chunks() { + let waypoints = vec![ + [25.0, -80.0], + [25.05, -80.01], + [25.2, -80.2], + [25.25, -80.21], + ]; + + let chunks = split_waypoints_on_large_gaps(&waypoints, ROUTE_GAP_BREAK_M); + + assert_eq!(chunks.len(), 2); + assert_eq!(chunks[0], vec![[25.0, -80.0], [25.05, -80.01]]); + assert_eq!(chunks[1], vec![[25.2, -80.2], [25.25, -80.21]]); + } + + #[test] + fn corridor_edge_loader_skips_mismatched_highways() { + let edge = + corridor_edge_from_row("I-8", "I-795", 1, 2, 100, "[[32.0,-117.0],[32.1,-117.1]]"); + assert!(edge.is_none()); + + let edge = corridor_edge_from_row("I-8", "I-8", 1, 2, 100, "[[32.0,-117.0],[32.1,-117.1]]"); + assert!(edge.is_some()); + } } diff --git a/schema/bootstrap.sql b/schema/bootstrap.sql index c8ecd3c..4c2f14b 100644 --- a/schema/bootstrap.sql +++ b/schema/bootstrap.sql @@ -45,6 +45,7 @@ CREATE TABLE IF NOT EXISTS highway_edges ( min_lon DOUBLE PRECISION NOT NULL, max_lon DOUBLE PRECISION NOT NULL, polyline_json TEXT NOT NULL, + source_way_ids_json TEXT NOT NULL DEFAULT '[]', direction TEXT, corridor_id INTEGER ); @@ -52,14 +53,25 @@ CREATE INDEX IF NOT EXISTS highway_edges_geom_idx ON highway_edges USING GIST (g CREATE INDEX IF NOT EXISTS highway_edges_corridor_idx ON highway_edges (highway, component); CREATE INDEX IF NOT EXISTS highway_edges_start_node_idx ON highway_edges (start_node); CREATE INDEX IF NOT EXISTS highway_edges_end_node_idx ON highway_edges (end_node); +ALTER TABLE highway_edges + ADD COLUMN IF NOT EXISTS source_way_ids_json TEXT NOT NULL DEFAULT '[]'; CREATE INDEX IF NOT EXISTS highway_edges_corridor_id_idx ON highway_edges (corridor_id); CREATE TABLE IF NOT EXISTS corridors ( corridor_id INTEGER PRIMARY KEY, highway TEXT NOT NULL, - canonical_direction TEXT + canonical_direction TEXT, + root_relation_id BIGINT, + geometry_json TEXT NOT NULL DEFAULT '[]', + source_way_ids_json TEXT NOT NULL DEFAULT '[]' ); CREATE INDEX IF NOT EXISTS corridors_highway_idx ON corridors (highway); +ALTER TABLE corridors + ADD COLUMN IF NOT EXISTS root_relation_id BIGINT; +ALTER TABLE corridors + ADD COLUMN IF NOT EXISTS geometry_json TEXT NOT NULL DEFAULT '[]'; +ALTER TABLE corridors + ADD COLUMN IF NOT EXISTS source_way_ids_json TEXT NOT NULL DEFAULT '[]'; CREATE TABLE IF NOT EXISTS corridor_exits ( corridor_id INTEGER NOT NULL, diff --git a/schema/derive.sql b/schema/derive.sql index 2cec995..ee89d7d 100644 --- a/schema/derive.sql +++ b/schema/derive.sql @@ -53,12 +53,15 @@ CREATE TABLE IF NOT EXISTS highway_edges ( min_lon DOUBLE PRECISION NOT NULL, max_lon DOUBLE PRECISION NOT NULL, polyline_json TEXT NOT NULL, + source_way_ids_json TEXT NOT NULL DEFAULT '[]', direction TEXT ); CREATE INDEX IF NOT EXISTS highway_edges_geom_idx ON highway_edges USING GIST (geom); CREATE INDEX IF NOT EXISTS highway_edges_corridor_idx ON highway_edges (highway, component); CREATE INDEX IF NOT EXISTS highway_edges_start_node_idx ON highway_edges (start_node); CREATE INDEX IF NOT EXISTS highway_edges_end_node_idx ON highway_edges (end_node); +ALTER TABLE highway_edges + ADD COLUMN IF NOT EXISTS source_way_ids_json TEXT NOT NULL DEFAULT '[]'; -- Corridor tables (populated by openinterstate-derive after this SQL) ALTER TABLE highway_edges ADD COLUMN IF NOT EXISTS corridor_id INTEGER; @@ -67,9 +70,18 @@ CREATE INDEX IF NOT EXISTS highway_edges_corridor_id_idx ON highway_edges (corri CREATE TABLE IF NOT EXISTS corridors ( corridor_id INTEGER PRIMARY KEY, highway TEXT NOT NULL, - canonical_direction TEXT + canonical_direction TEXT, + root_relation_id BIGINT, + geometry_json TEXT NOT NULL DEFAULT '[]', + source_way_ids_json TEXT NOT NULL DEFAULT '[]' ); CREATE INDEX IF NOT EXISTS corridors_highway_idx ON corridors (highway); +ALTER TABLE corridors + ADD COLUMN IF NOT EXISTS root_relation_id BIGINT; +ALTER TABLE corridors + ADD COLUMN IF NOT EXISTS geometry_json TEXT NOT NULL DEFAULT '[]'; +ALTER TABLE corridors + ADD COLUMN IF NOT EXISTS source_way_ids_json TEXT NOT NULL DEFAULT '[]'; CREATE TABLE IF NOT EXISTS corridor_exits ( corridor_id INTEGER NOT NULL, diff --git a/schema/osm2pgsql/openinterstate.lua b/schema/osm2pgsql/openinterstate.lua index ece155f..2481563 100644 --- a/schema/osm2pgsql/openinterstate.lua +++ b/schema/osm2pgsql/openinterstate.lua @@ -78,6 +78,39 @@ local function is_highway_class(v) or v == "trunk_link" end +local function has_interstate_ref(tags) + local ref = nil + if tags.ref ~= nil and tags.ref ~= "" then + ref = tags.ref + elseif tags.int_ref ~= nil and tags.int_ref ~= "" then + ref = tags.int_ref + else + ref = "" + end + + for candidate in string.gmatch(ref, "([^;]+)") do + local trimmed = candidate:gsub("^%s+", ""):gsub("%s+$", "") + if string.match(trimmed, "^[Ii][%s%-]*%d+%a?$") ~= nil then + return true + end + end + return false +end + +local function effective_highway_class(tags) + local highway = tags.highway + if is_highway_class(highway) then + return highway + end + if highway == "construction" and is_highway_class(tags.construction) then + return tags.construction + end + if highway == "construction" and has_interstate_ref(tags) then + return "motorway" + end + return nil +end + function osm2pgsql.process_node(object) local tags = object.tags @@ -106,9 +139,9 @@ end function osm2pgsql.process_way(object) local tags = object.tags - local hwy = tags.highway + local hwy = effective_highway_class(tags) - if hwy ~= nil and is_highway_class(hwy) then + if hwy ~= nil then local geom = object:as_linestring() if geom ~= nil then -- Format node IDs as PostgreSQL int8[] literal: {id1,id2,...} diff --git a/tooling/export_release.py b/tooling/export_release.py index b920c96..0adbf25 100755 --- a/tooling/export_release.py +++ b/tooling/export_release.py @@ -5,6 +5,7 @@ import csv import hashlib import json +import math import re from dataclasses import dataclass from datetime import date, datetime, timezone @@ -21,6 +22,7 @@ INTERSTATE_FILTER = r"^(?:I-?[0-9]+|I-?35[EW]|I-?69[CEW])$" INTERSTATE_NAME_RE = re.compile(INTERSTATE_FILTER) SHA256_RE = re.compile(r"^[0-9a-f]{64}$") +ROUTE_GAP_BREAK_METERS = 10_000 @dataclass(frozen=True) @@ -199,22 +201,111 @@ def write_checksums( writer.writerow([sha256_file(file_path, hash_cache), file_path.relative_to(output_path.parent).as_posix()]) +def distance_meters(a: list[float], b: list[float]) -> float: + lat1, lon1 = a + lat2, lon2 = b + lat1_rad = math.radians(lat1) + lat2_rad = math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + hav = ( + math.sin(dlat / 2) ** 2 + + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2) ** 2 + ) + return 2 * 6_371_000 * math.asin(min(1.0, math.sqrt(hav))) + + +def split_route_waypoints( + waypoints: list[list[float]], gap_meters: float = ROUTE_GAP_BREAK_METERS +) -> list[list[list[float]]]: + if len(waypoints) < 2: + return [] + + segments: list[list[list[float]]] = [] + current = [waypoints[0]] + + for point in waypoints[1:]: + if distance_meters(current[-1], point) > gap_meters: + if len(current) >= 2: + segments.append(current) + current = [point] + continue + current.append(point) + + if len(current) >= 2: + segments.append(current) + + return segments + + +def is_waypoint_pair(value: Any) -> bool: + return ( + isinstance(value, list) + and len(value) >= 2 + and isinstance(value[0], (int, float)) + and isinstance(value[1], (int, float)) + ) + + +def route_waypoint_segments(route: dict[str, Any]) -> list[list[list[float]]]: + raw = json.loads(route["waypoints_json"]) + if not isinstance(raw, list) or not raw: + return [] + + if is_waypoint_pair(raw[0]): + waypoints = [[float(pair[0]), float(pair[1])] for pair in raw if is_waypoint_pair(pair)] + segments = split_route_waypoints(waypoints) + if not segments and len(waypoints) >= 2: + segments = [waypoints] + return segments + + segments: list[list[list[float]]] = [] + for segment in raw: + if not isinstance(segment, list): + continue + points = [[float(pair[0]), float(pair[1])] for pair in segment if is_waypoint_pair(pair)] + if len(points) >= 2: + segments.append(points) + return segments + + +def route_geometry_geojson(route: dict[str, Any]) -> dict[str, Any]: + segments = route_waypoint_segments(route) + + if len(segments) <= 1: + coords = [[pair[1], pair[0]] for pair in (segments[0] if segments else [])] + return {"type": "LineString", "coordinates": coords} + + return { + "type": "MultiLineString", + "coordinates": [ + [[pair[1], pair[0]] for pair in segment] + for segment in segments + ], + } + + def route_waypoints_to_gpx(route: dict[str, Any]) -> str: - waypoints = json.loads(route["waypoints_json"]) - points = [] - for idx, pair in enumerate(waypoints): - lat, lon = pair - points.append( - f' {route["reference_route_id"]}-{idx}' - ) + segments = route_waypoint_segments(route) + + track_segments = [] + for segment_idx, segment in enumerate(segments): + track_segments.append(" ") + for point_idx, pair in enumerate(segment): + lat, lon = pair + track_segments.append( + f' {route["reference_route_id"]}-{segment_idx}-{point_idx}' + ) + track_segments.append(" ") + display_name = route["display_name"] or route["reference_route_id"] return "\n".join( [ '', '', - f" {xml_escape(display_name)}", - *points, - " ", + f" {xml_escape(display_name)}", + *track_segments, + " ", "", ] ) @@ -312,10 +403,10 @@ def main() -> None: c.highway AS interstate_name, c.canonical_direction AS direction_code, initcap(c.canonical_direction) AS direction_label, - ST_AsGeoJSON(ST_LineMerge(ST_Collect(he.geom))) AS geometry_geojson, + c.geometry_json AS geometry_geojson, COUNT(he.id) AS edge_count FROM corridors c - JOIN highway_edges he ON he.corridor_id = c.corridor_id + LEFT JOIN highway_edges he ON he.corridor_id = c.corridor_id WHERE c.highway ~ '{INTERSTATE_FILTER}' GROUP BY c.corridor_id, c.highway, c.canonical_direction ORDER BY c.highway, c.canonical_direction, c.corridor_id @@ -461,13 +552,7 @@ def main() -> None: if spec.name == "reference_routes": for row in rows: - waypoints = json.loads(row["waypoints_json"]) - row["geometry_geojson"] = json.dumps( - { - "type": "LineString", - "coordinates": [[pair[1], pair[0]] for pair in waypoints], - } - ) + row["geometry_geojson"] = json.dumps(route_geometry_geojson(row)) parquet_rows = [ {key: value for key, value in row.items() if key != "waypoints_json"} for row in rows ] diff --git a/tooling/extract_interstate_relations.py b/tooling/extract_interstate_relations.py new file mode 100644 index 0000000..0826026 --- /dev/null +++ b/tooling/extract_interstate_relations.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import csv +from collections import defaultdict +import re +import subprocess +import urllib.parse +from dataclasses import dataclass +from pathlib import Path + + +INTERSTATE_NETWORK = "US:I" +INTERSTATE_REF_RE = re.compile(r"^I?[\s-]*(\d+[A-Z]?)$") +CARDINAL_DIRECTIONS = {"north", "south", "east", "west"} + + +@dataclass(frozen=True) +class RelationMember: + member_type: str + member_id: int + role: str + + +@dataclass +class InterstateRelation: + relation_id: int + ref: str + direction: str | None + members: list[RelationMember] + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Extract cached Interstate route relation memberships from a source PBF." + ) + parser.add_argument("--source-pbf", required=True) + parser.add_argument("--output", required=True) + return parser.parse_args(argv) + + +def normalize_interstate_ref(raw: str | None) -> str | None: + if raw is None: + return None + match = INTERSTATE_REF_RE.fullmatch(raw.strip().upper()) + if not match: + return None + return f"I-{match.group(1)}" + + +def normalize_direction(raw: str | None) -> str | None: + if raw is None: + return None + value = urllib.parse.unquote(raw).strip().lower() + if value in CARDINAL_DIRECTIONS: + return value + return None + + +def parse_tags(raw: str) -> dict[str, str]: + if not raw: + return {} + + tags: dict[str, str] = {} + for entry in raw.split(","): + key, sep, value = entry.partition("=") + if not sep: + continue + tags[key] = urllib.parse.unquote(value) + return tags + + +def parse_members(raw: str) -> list[RelationMember]: + if not raw: + return [] + + members: list[RelationMember] = [] + for entry in raw.split(","): + if not entry: + continue + + member_type = entry[0] + if member_type not in {"w", "r"}: + continue + + body = entry[1:] + member_id_raw, _, role = body.partition("@") + if not member_id_raw.isdigit(): + continue + + members.append( + RelationMember( + member_type=member_type, + member_id=int(member_id_raw), + role=urllib.parse.unquote(role), + ) + ) + return members + + +def parse_relation_line(line: str) -> InterstateRelation | None: + if not line.startswith("r"): + return None + + prefix, _, remainder = line.partition(" T") + if not remainder: + return None + + relation_token = prefix.split(" ", 1)[0] + if not relation_token[1:].isdigit(): + return None + relation_id = int(relation_token[1:]) + + tags_raw, _, members_raw = remainder.partition(" M") + tags = parse_tags(tags_raw) + if tags.get("network") != INTERSTATE_NETWORK or tags.get("route") != "road": + return None + + ref = normalize_interstate_ref(tags.get("ref")) + if ref is None: + return None + + return InterstateRelation( + relation_id=relation_id, + ref=ref, + direction=normalize_direction(tags.get("direction")), + members=parse_members(members_raw), + ) + + +def load_interstate_relations(source_pbf: Path) -> dict[int, InterstateRelation]: + relations: dict[int, InterstateRelation] = {} + process = subprocess.Popen( + ["osmium", "cat", "-t", "relation", str(source_pbf), "-f", "opl"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + assert process.stdout is not None + + try: + for line in process.stdout: + relation = parse_relation_line(line.rstrip("\n")) + if relation is None: + continue + relations[relation.relation_id] = relation + finally: + process.stdout.close() + + stderr = "" + if process.stderr is not None: + stderr = process.stderr.read() + process.stderr.close() + + return_code = process.wait() + if return_code != 0: + raise RuntimeError(f"osmium cat failed with exit code {return_code}: {stderr.strip()}") + + return relations + + +def referenced_relation_ids(relations: dict[int, InterstateRelation]) -> set[int]: + referenced: set[int] = set() + for relation in relations.values(): + for member in relation.members: + if member.member_type == "r" and member.member_id in relations: + referenced.add(member.member_id) + return referenced + + +def effective_direction( + relation_direction: str | None, + member_role: str, + inherited_direction: str | None, +) -> str | None: + role_direction = normalize_direction(member_role) + if role_direction is not None: + return role_direction + if relation_direction is not None: + return relation_direction + return inherited_direction + + +def flatten_relation_memberships( + relations: dict[int, InterstateRelation], +) -> list[tuple[int, str, int, int, str, str, int]]: + rows: list[tuple[int, str, int, int, str, str, int]] = [] + roots = sorted(set(relations) - referenced_relation_ids(relations)) + visited_relations: set[int] = set() + sequence_by_group: dict[tuple[int, str], int] = defaultdict(int) + + def visit( + relation_id: int, + root_relation_id: int, + inherited_direction: str | None, + stack: set[int], + ) -> None: + if relation_id in stack: + return + + relation = relations[relation_id] + visited_relations.add(relation_id) + stack = set(stack) + stack.add(relation_id) + + for member in relation.members: + direction = effective_direction(relation.direction, member.role, inherited_direction) + if member.member_type == "w": + direction_key = direction or "" + sequence_index = sequence_by_group[(root_relation_id, direction_key)] + sequence_by_group[(root_relation_id, direction_key)] += 1 + rows.append( + ( + member.member_id, + relation.ref, + root_relation_id, + relation_id, + direction_key, + member.role, + sequence_index, + ) + ) + continue + + if member.member_type == "r" and member.member_id in relations: + child = relations[member.member_id] + if child.ref != relation.ref: + continue + visit(member.member_id, root_relation_id, direction, stack) + + for root_relation_id in roots: + visit(root_relation_id, root_relation_id, relations[root_relation_id].direction, set()) + + for relation_id in sorted(relations): + if relation_id not in visited_relations: + visit(relation_id, relation_id, relations[relation_id].direction, set()) + + return rows + + +def write_rows(rows: list[tuple[int, str, int, int, str, str, int]], output: Path) -> None: + output.parent.mkdir(parents=True, exist_ok=True) + with output.open("w", encoding="utf-8", newline="") as fh: + writer = csv.writer(fh, delimiter="\t") + writer.writerow( + [ + "way_id", + "ref", + "root_relation_id", + "leaf_relation_id", + "direction", + "role", + "sequence_index", + ] + ) + writer.writerows(rows) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv) + source_pbf = Path(args.source_pbf).resolve() + output = Path(args.output).resolve() + + relations = load_interstate_relations(source_pbf) + rows = flatten_relation_memberships(relations) + write_rows(rows, output) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tooling/tests/test_export_release.py b/tooling/tests/test_export_release.py index e62446c..0be48ea 100644 --- a/tooling/tests/test_export_release.py +++ b/tooling/tests/test_export_release.py @@ -109,6 +109,77 @@ def test_load_source_file_metadata_rejects_bad_sha256(self) -> None: with self.assertRaisesRegex(ValueError, "sha256"): MODULE.load_source_file_metadata(metadata_path, "source_pbf") +class RouteGeometryTests(unittest.TestCase): + def test_route_geometry_splits_large_gaps_into_multilinestring(self) -> None: + route = { + "waypoints_json": json.dumps( + [ + [32.0, -117.0], + [32.01, -117.01], + [40.0, -75.0], + [40.01, -75.01], + ] + ) + } + + geometry = MODULE.route_geometry_geojson(route) + + self.assertEqual(geometry["type"], "MultiLineString") + self.assertEqual(len(geometry["coordinates"]), 2) + + def test_route_geometry_honors_explicit_segmented_waypoints(self) -> None: + route = { + "waypoints_json": json.dumps( + [ + [[32.0, -117.0], [32.01, -117.01]], + [[40.0, -75.0], [40.01, -75.01]], + ] + ) + } + + geometry = MODULE.route_geometry_geojson(route) + + self.assertEqual(geometry["type"], "MultiLineString") + self.assertEqual(len(geometry["coordinates"]), 2) + + def test_route_waypoints_to_gpx_emits_multiple_track_segments(self) -> None: + route = { + "reference_route_id": "route-1", + "display_name": "I-TEST Northbound", + "waypoints_json": json.dumps( + [ + [32.0, -117.0], + [32.01, -117.01], + [40.0, -75.0], + [40.01, -75.01], + ] + ), + } + + gpx = MODULE.route_waypoints_to_gpx(route) + + self.assertEqual(gpx.count(""), 2) + self.assertIn("route-1-0-0", gpx) + self.assertIn("route-1-1-1", gpx) + + def test_route_waypoints_to_gpx_preserves_explicit_segments(self) -> None: + route = { + "reference_route_id": "route-2", + "display_name": "I-TEST Southbound", + "waypoints_json": json.dumps( + [ + [[32.0, -117.0], [32.01, -117.01]], + [[40.0, -75.0], [40.01, -75.01]], + ] + ), + } + + gpx = MODULE.route_waypoints_to_gpx(route) + + self.assertEqual(gpx.count(""), 2) + self.assertIn("route-2-0-0", gpx) + self.assertIn("route-2-1-1", gpx) + if __name__ == "__main__": unittest.main() diff --git a/tooling/tests/test_extract_interstate_relations.py b/tooling/tests/test_extract_interstate_relations.py new file mode 100644 index 0000000..35df807 --- /dev/null +++ b/tooling/tests/test_extract_interstate_relations.py @@ -0,0 +1,89 @@ +import importlib.util +import sys +import unittest +from pathlib import Path + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "extract_interstate_relations.py" +SPEC = importlib.util.spec_from_file_location("extract_interstate_relations", MODULE_PATH) +assert SPEC and SPEC.loader +MODULE = importlib.util.module_from_spec(SPEC) +sys.modules[SPEC.name] = MODULE +SPEC.loader.exec_module(MODULE) + + +class ExtractInterstateRelationsTests(unittest.TestCase): + def test_parse_relation_line_accepts_interstate_route(self) -> None: + relation = MODULE.parse_relation_line( + "r331325 v18 dV c0 t2021-10-01T02:32:58Z i0 u " + "Tname=I%20%95%20%(super),network=US:I,ref=95,route=road,type=route " + "Mr317707@,r338257@south,r338258@north" + ) + + self.assertIsNotNone(relation) + assert relation is not None + self.assertEqual(relation.relation_id, 331325) + self.assertEqual(relation.ref, "I-95") + self.assertEqual(relation.members[0].member_type, "r") + self.assertEqual(relation.members[1].role, "south") + + def test_parse_relation_line_rejects_non_interstate_network(self) -> None: + relation = MODULE.parse_relation_line( + "r23147 v393 dV c0 t2026-03-05T11:41:12Z i0 u " + "Tnetwork=US:US,ref=64,route=road,type=route Mw1@forward" + ) + + self.assertIsNone(relation) + + def test_flatten_relation_memberships_inherits_direction_from_children(self) -> None: + relations = { + 1: MODULE.InterstateRelation( + relation_id=1, + ref="I-95", + direction=None, + members=[ + MODULE.RelationMember("r", 2, "north"), + MODULE.RelationMember("r", 3, "south"), + ], + ), + 2: MODULE.InterstateRelation( + relation_id=2, + ref="I-95", + direction="north", + members=[MODULE.RelationMember("w", 101, "")], + ), + 3: MODULE.InterstateRelation( + relation_id=3, + ref="I-95", + direction="south", + members=[MODULE.RelationMember("w", 202, "")], + ), + } + + rows = MODULE.flatten_relation_memberships(relations) + + self.assertEqual( + rows, + [ + (101, "I-95", 1, 2, "north", "", 0), + (202, "I-95", 1, 3, "south", "", 0), + ], + ) + + def test_flatten_relation_memberships_keeps_relation_root_for_direct_way_members(self) -> None: + relations = { + 10: MODULE.InterstateRelation( + relation_id=10, + ref="I-10", + direction="west", + members=[MODULE.RelationMember("w", 999, "")], + ) + } + + rows = MODULE.flatten_relation_memberships(relations) + + self.assertEqual(rows, [(999, "I-10", 10, 10, "west", "", 0)]) + + +if __name__ == "__main__": + unittest.main()