diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index b221aed4..e6dff4c1 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -1,14 +1,33 @@ -name: Deploy Docs to GitHub Pages +name: Documentation on: push: branches: - main + pull_request: jobs: - release: + doc-check: + name: Rustdoc + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v6 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Build documentation with warnings denied + run: cargo doc --all --no-deps + env: + RUSTDOCFLAGS: -D warnings + + deploy: name: GitHub Pages runs-on: ubuntu-latest + needs: doc-check + if: github.event_name == 'push' && github.ref == 'refs/heads/main' steps: - name: Checkout Repository @@ -19,6 +38,8 @@ jobs: - name: Build Documentation run: cargo doc --all --no-deps + env: + RUSTDOCFLAGS: -D warnings - name: Create index run: echo '' > target/doc/index.html diff --git a/CHANGELOG.md b/CHANGELOG.md index 57ba8677..69d6489f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Change Log +## 0.29.0 - TBD + +- Breaking: `Metadata::build_time()` now returns + `Result` instead of panicking on + unrepresentable timestamps, and databases whose `build_epoch` cannot be + represented as `SystemTime` are rejected as invalid during open and verify. +- Breaking: `Reader::metadata` is now private. Use `Reader::metadata()` to + access validated metadata by shared reference. +- Breaking: Opening a database now rejects unsupported major format versions, + unsupported IP versions, and zero-node search trees. Minor format versions + are still accepted for forward compatibility. +- Fixed: `within()` and `networks()` now handle IPv6 databases without an + IPv4 subtree without reading terminal data nodes as tree nodes or + panicking while formatting low IPv6 networks. +- Fixed: Skipping ignored or unknown fields now enforces data-section + bounds and the maximum nesting depth instead of accepting truncated + values or overflowing the stack on deeply nested corrupt data. +- Internal: Added focused benchmarks for network iteration and serde + unknown-field skipping, plus metadata build-time conversion, to guard + performance-sensitive fixes. +- Documentation: Fixed intra-doc links so docs build with warnings denied, + and added a CI check to keep them passing. + ## 0.28.1 - 2026-04-26 - Fixed: Databases with an impossible declared search tree size are now diff --git a/Cargo.toml b/Cargo.toml index 337e5ce5..3eedd639 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "maxminddb" -version = "0.28.1" +version = "0.29.0" authors = [ "Gregory J. Oschwald " ] description = "Library for reading MaxMind DB format used by GeoIP2 and GeoLite2" readme = "README.md" @@ -47,3 +47,11 @@ harness = false [[bench]] name = "serde_usage" harness = false + +[[bench]] +name = "within" +harness = false + +[[bench]] +name = "metadata" +harness = false diff --git a/README.md b/README.md index 507c17fe..0356c72b 100644 --- a/README.md +++ b/README.md @@ -28,14 +28,14 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -maxminddb = "0.28" +maxminddb = "0.29" ``` Enable optional features as needed: ```toml [dependencies] -maxminddb = { version = "0.28", features = ["mmap"] } +maxminddb = { version = "0.29", features = ["mmap"] } ``` ## Example @@ -113,7 +113,7 @@ Enable in `Cargo.toml`: ```toml [dependencies] -maxminddb = { version = "0.28", features = ["mmap"] } +maxminddb = { version = "0.29", features = ["mmap"] } ``` Note: `simdutf8` and `unsafe-str-decode` are mutually exclusive. diff --git a/UPGRADING.md b/UPGRADING.md index 4043d847..94b9de9b 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -1,5 +1,49 @@ # Upgrading Guide +## 0.28 to 0.29 + +### Reader Metadata + +`Reader::metadata` is now private. Use `Reader::metadata()` to access +validated metadata by shared reference. + +**Before (0.28):** + +```rust +let database_type = &reader.metadata.database_type; +``` + +**After (0.29):** + +```rust +let database_type = &reader.metadata().database_type; +``` + +### Metadata Build Time + +`Metadata::build_time()` now returns `Result`. +Databases whose `build_epoch` cannot be represented as `SystemTime` are +rejected as invalid when opened or verified. + +**Before (0.28):** + +```rust +let build_time = reader.metadata.build_time(); +``` + +**After (0.29):** + +```rust +let build_time = reader.metadata().build_time()?; +``` + +### Metadata Validation + +Opening a database now rejects unsupported major format versions, unsupported +IP versions, zero-node search trees, and unrepresentable build epochs as +invalid databases. Minor format versions are still accepted for forward +compatibility. + ## 0.26 to 0.27 This release includes significant API changes to improve ergonomics and enable diff --git a/benches/metadata.rs b/benches/metadata.rs new file mode 100644 index 00000000..ecff14a0 --- /dev/null +++ b/benches/metadata.rs @@ -0,0 +1,20 @@ +use std::hint::black_box; + +use criterion::{criterion_group, criterion_main, Criterion}; + +const DB_FILE: &str = "GeoLite2-City.mmdb"; + +pub fn metadata_benchmark(c: &mut Criterion) { + let reader = maxminddb::Reader::open_readfile(DB_FILE).unwrap(); + + c.bench_function("metadata/build_time", |b| { + b.iter(|| black_box(reader.metadata().build_time().unwrap())) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default().sample_size(20); + targets = metadata_benchmark +} +criterion_main!(benches); diff --git a/benches/serde_usage.rs b/benches/serde_usage.rs index 7d6be999..5a269fb7 100644 --- a/benches/serde_usage.rs +++ b/benches/serde_usage.rs @@ -10,6 +10,9 @@ use common::{generate_ipv4, open_reader}; const DB_FILE: &str = "GeoLite2-City.mmdb"; +#[derive(serde::Deserialize)] +struct EmptyRecord {} + fn cache_lookups<'a, T>(ips: &[IpAddr], reader: &'a Reader) -> Vec> where T: AsRef<[u8]>, @@ -50,6 +53,16 @@ where } } +fn bench_decode_empty_record(results: &[LookupResult<'_, T>]) +where + T: AsRef<[u8]>, +{ + for result in results { + let empty: Option = result.decode().unwrap(); + black_box(empty); + } +} + fn bench_decode_path_country_iso(results: &[LookupResult<'_, T>]) where T: AsRef<[u8]>, @@ -91,6 +104,9 @@ pub fn serde_usage_benchmark(c: &mut Criterion) { c.bench_function("serde_usage/decode_country_only", |b| { b.iter(|| bench_decode_country_only(&cached_results)) }); + c.bench_function("serde_usage/decode_empty_record", |b| { + b.iter(|| bench_decode_empty_record(&cached_results)) + }); c.bench_function("serde_usage/decode_path_country_iso", |b| { b.iter(|| bench_decode_path_country_iso(&cached_results)) }); diff --git a/benches/within.rs b/benches/within.rs new file mode 100644 index 00000000..ef3207be --- /dev/null +++ b/benches/within.rs @@ -0,0 +1,61 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use maxminddb::{geoip2, Reader}; +use std::hint::black_box; + +fn bench_networks_city_test(c: &mut Criterion) { + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + + c.bench_function("within/networks_city_test", |b| { + b.iter(|| { + let mut count = 0usize; + for result in reader.networks(Default::default()).unwrap() { + let lookup = result.unwrap(); + black_box(lookup.network().unwrap()); + count += 1; + } + black_box(count); + }); + }); +} + +fn bench_within_city_subnet(c: &mut Criterion) { + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let cidr = "81.2.69.0/24".parse().unwrap(); + + c.bench_function("within/city_subnet", |b| { + b.iter(|| { + let mut count = 0usize; + for result in reader.within(cidr, Default::default()).unwrap() { + let lookup = result.unwrap(); + let city: Option> = lookup.decode().unwrap(); + black_box(city); + count += 1; + } + black_box(count); + }); + }); +} + +fn bench_networks_mixed_test(c: &mut Criterion) { + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + + c.bench_function("within/networks_mixed_test", |b| { + b.iter(|| { + let mut count = 0usize; + for result in reader.networks(Default::default()).unwrap() { + let lookup = result.unwrap(); + black_box(lookup.network().unwrap()); + count += 1; + } + black_box(count); + }); + }); +} + +criterion_group! { + name = benches; + config = Criterion::default().sample_size(20); + targets = bench_networks_city_test, bench_within_city_subnet, bench_networks_mixed_test +} +criterion_main!(benches); diff --git a/src/decoder.rs b/src/decoder.rs index 351457b0..81bb2c11 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -34,6 +34,11 @@ const TYPE_FLOAT: u8 = 15; /// This matches the value used in libmaxminddb and the Go reader. const MAXIMUM_DATA_STRUCTURE_DEPTH: u16 = 512; +/// Lower limit for values skipped through unknown fields or IgnoredAny. +/// Skipping is recursive and can be reached by corrupt data that callers did +/// not explicitly request, so keep the limit below small default thread stacks. +const MAXIMUM_SKIPPED_DATA_STRUCTURE_DEPTH: u16 = 128; + #[inline(always)] fn to_usize(base: u8, bytes: &[u8]) -> usize { bytes @@ -49,11 +54,12 @@ macro_rules! decode_int_like { let new_offset = self .current_ptr .checked_add(size) - .filter(|&offset| offset <= self.buf.len()) + .filter(|&offset| offset <= self.limit) .ok_or_else(|| { self.invalid_db_error(&format!("{} of size {}", $label, size)) })?; - let value = self.buf[self.current_ptr..new_offset] + let value = self + .slice(self.current_ptr, new_offset) .iter() .fold($zero, |acc, &b| (acc << 8) | <$ty>::from(b)); self.current_ptr = new_offset; @@ -102,14 +108,21 @@ enum Value<'a, 'de> { #[derive(Debug)] pub(crate) struct Decoder<'de> { buf: &'de [u8], + limit: usize, current_ptr: usize, depth: u16, } impl<'de> Decoder<'de> { pub(crate) fn new(buf: &'de [u8], start_ptr: usize) -> Decoder<'de> { + Decoder::new_with_limit(buf, start_ptr, buf.len()) + } + + pub(crate) fn new_with_limit(buf: &'de [u8], start_ptr: usize, limit: usize) -> Decoder<'de> { + debug_assert!(limit <= buf.len()); Decoder { buf, + limit, current_ptr: start_ptr, depth: 0, } @@ -158,18 +171,40 @@ impl<'de> Decoder<'de> { #[inline(always)] fn checked_offset(&self, size: usize, label: &str) -> DecodeResult { let new_offset = self.current_ptr.wrapping_add(size); - if new_offset < self.current_ptr || new_offset > self.buf.len() { + if new_offset < self.current_ptr || new_offset > self.limit { return Err(self.invalid_db_error(&format!("{label} of size {size}"))); } Ok(new_offset) } + #[inline(always)] + fn slice(&self, start: usize, end: usize) -> &'de [u8] { + debug_assert!(start <= end); + debug_assert!(end <= self.limit); + debug_assert!(self.limit <= self.buf.len()); + // SAFETY: Decoder constructors ensure `limit <= buf.len()`, and all + // callers reach this helper only after checking `end <= limit`. + unsafe { self.buf.get_unchecked(start..end) } + } + + #[inline(always)] + fn skip_bytes(&mut self, size: usize, label: &str) -> DecodeResult<()> { + if self.current_ptr > self.limit || size > self.limit - self.current_ptr { + return Err(self.invalid_db_error(&format!("{label} of size {size}"))); + } + self.current_ptr += size; + Ok(()) + } + #[inline(always)] fn eat_byte(&mut self) -> DecodeResult { - let b = *self - .buf - .get(self.current_ptr) - .ok_or_else(|| self.invalid_db_error("unexpected end of buffer"))?; + if self.current_ptr >= self.limit { + return Err(self.invalid_db_error("unexpected end of buffer")); + } + debug_assert!(self.limit <= self.buf.len()); + // SAFETY: The check above proves `current_ptr < limit`, and decoder + // construction guarantees `limit <= buf.len()`. + let b = unsafe { *self.buf.get_unchecked(self.current_ptr) }; self.current_ptr += 1; Ok(b) } @@ -314,7 +349,7 @@ impl<'de> Decoder<'de> { fn decode_bytes(&mut self, size: usize) -> DecodeResult<&'de [u8]> { let new_offset = self.checked_offset(size, "bytes")?; - let u8_slice = &self.buf[self.current_ptr..new_offset]; + let u8_slice = self.slice(self.current_ptr, new_offset); self.current_ptr = new_offset; Ok(u8_slice) @@ -322,7 +357,8 @@ impl<'de> Decoder<'de> { fn decode_float(&mut self, size: usize) -> DecodeResult { let new_offset = self.checked_offset(size, "float")?; - let value: [u8; 4] = self.buf[self.current_ptr..new_offset] + let value: [u8; 4] = self + .slice(self.current_ptr, new_offset) .try_into() .map_err(|_| self.invalid_db_error(&format!("float of size {size}")))?; self.current_ptr = new_offset; @@ -332,7 +368,8 @@ impl<'de> Decoder<'de> { fn decode_double(&mut self, size: usize) -> DecodeResult { let new_offset = self.checked_offset(size, "double")?; - let value: [u8; 8] = self.buf[self.current_ptr..new_offset] + let value: [u8; 8] = self + .slice(self.current_ptr, new_offset) .try_into() .map_err(|_| self.invalid_db_error(&format!("double of size {size}")))?; self.current_ptr = new_offset; @@ -351,7 +388,7 @@ impl<'de> Decoder<'de> { let new_offset = self .current_ptr .checked_add(size) - .filter(|&offset| offset <= self.buf.len()) + .filter(|&offset| offset <= self.limit) .ok_or_else(|| self.invalid_db_error(&format!("{label} of size {}", size)))?; let p = self.current_ptr; let value = match size { @@ -387,7 +424,7 @@ impl<'de> Decoder<'de> { let new_offset = self .current_ptr .checked_add(size) - .filter(|&offset| offset <= self.buf.len()) + .filter(|&offset| offset <= self.limit) .ok_or_else(|| self.invalid_db_error(&format!("u16 of size {}", size)))?; let p = self.current_ptr; let value = match size { @@ -415,17 +452,17 @@ impl<'de> Decoder<'de> { let pointer_value_offset = [0, 0, 2048, 526_336, 0]; let pointer_size = ((size >> 3) & 0x3) + 1; let p = self.current_ptr; - let len = self.buf.len(); + let limit = self.limit; let new_offset = match p.checked_add(pointer_size) { - Some(offset) if offset <= len => offset, + Some(offset) if offset <= limit => offset, _ => { // Clamp to the end of the buffer so the next decode step fails // with a normal bounds error instead of panicking here. - self.current_ptr = len; - return len; + self.current_ptr = limit; + return limit; } }; - let pointer_bytes = &self.buf[p..new_offset]; + let pointer_bytes = self.slice(p, new_offset); self.current_ptr = new_offset; let base = if pointer_size == 4 { @@ -443,7 +480,7 @@ impl<'de> Decoder<'de> { use std::str::from_utf8_unchecked; let new_offset = self.checked_offset(size, "string")?; - let bytes = &self.buf[self.current_ptr..new_offset]; + let bytes = self.slice(self.current_ptr, new_offset); self.current_ptr = new_offset; // SAFETY: // A corrupt maxminddb will cause undefined behaviour. @@ -464,7 +501,7 @@ impl<'de> Decoder<'de> { use std::str::from_utf8_unchecked; let new_offset = self.checked_offset(size, "string")?; - let bytes = &self.buf[self.current_ptr..new_offset]; + let bytes = self.slice(self.current_ptr, new_offset); self.current_ptr = new_offset; if bytes.is_ascii() { // ASCII is valid UTF-8, so this avoids the full validator fast path. @@ -579,10 +616,10 @@ impl<'de> Decoder<'de> { .current_ptr .checked_add(size) .ok_or_else(|| self.invalid_db_error("string length exceeds buffer"))?; - if new_offset > self.buf.len() { + if new_offset > self.limit { return Err(self.invalid_db_error("string length exceeds buffer")); } - let bytes = &self.buf[self.current_ptr..new_offset]; + let bytes = self.slice(self.current_ptr, new_offset); self.current_ptr = new_offset; Ok(bytes) } @@ -655,45 +692,81 @@ impl<'de> Decoder<'de> { /// Skips the current value, following pointers. pub(crate) fn skip_value(&mut self) -> DecodeResult<()> { let (size, type_num) = self.size_and_type()?; - self.skip_value_inner(size, type_num, true) + self.skip_value_inner(size, type_num, true, 0) } /// Skips the current value without following pointers (for verification). pub(crate) fn skip_value_for_verification(&mut self) -> DecodeResult<()> { let (size, type_num) = self.size_and_type()?; - self.skip_value_inner(size, type_num, false) + self.skip_value_inner(size, type_num, false, 0)?; + self.validate_skip_end() } + #[inline(always)] + pub(crate) fn validate_skip_end(&mut self) -> DecodeResult<()> { + if self.current_ptr > self.limit { + return Err(self.invalid_db_error("skipped value extends beyond buffer")); + } + Ok(()) + } + + #[inline(always)] + fn check_skip_depth(&self, skip_depth: u16) -> DecodeResult { + if skip_depth == MAXIMUM_SKIPPED_DATA_STRUCTURE_DEPTH { + return self.skip_depth_error(); + } + Ok(skip_depth + 1) + } + + #[cold] + fn skip_depth_error(&self) -> DecodeResult { + Err(self + .invalid_db_error("exceeded maximum data structure depth; database is likely corrupt")) + } + + #[inline(always)] fn skip_value_inner( &mut self, size: usize, type_num: u8, follow_pointers: bool, + skip_depth: u16, ) -> DecodeResult<()> { + // When following pointers during normal serde skips, scalar payload + // bounds are validated once the whole skipped value is consumed. The + // no-follow verification path validates each raw payload immediately. match type_num { TYPE_POINTER => { - let new_ptr = self.decode_pointer(size); if follow_pointers { + let new_ptr = self.decode_pointer(size); let saved_ptr = self.current_ptr; self.current_ptr = new_ptr; - self.skip_value()?; + let result = match self.check_skip_depth(skip_depth) { + Ok(child_depth) => match self.skip_value_with_depth(child_depth) { + Ok(()) => self.validate_skip_end(), + Err(err) => Err(err), + }, + Err(err) => Err(err), + }; self.current_ptr = saved_ptr; + return result; } - Ok(()) + let pointer_size = ((size >> 3) & 0x3) + 1; + self.skip_bytes(pointer_size, "pointer") } TYPE_STRING | TYPE_BYTES => { // String or Bytes - skip size bytes + let label = if type_num == TYPE_STRING { + "string" + } else { + "bytes" + }; if follow_pointers { self.current_ptr += size; + Ok(()) } else { - let label = if type_num == TYPE_STRING { - "string" - } else { - "bytes" - }; - self.current_ptr = self.checked_offset(size, label)?; + self.skip_bytes(size, label) } - Ok(()) } TYPE_DOUBLE => { // Double - must be exactly 8 bytes @@ -702,10 +775,10 @@ impl<'de> Decoder<'de> { } if follow_pointers { self.current_ptr += size; + Ok(()) } else { - self.current_ptr = self.checked_offset(size, "double")?; + self.skip_bytes(size, "double") } - Ok(()) } TYPE_FLOAT => { // Float - must be exactly 4 bytes @@ -714,27 +787,27 @@ impl<'de> Decoder<'de> { } if follow_pointers { self.current_ptr += size; + Ok(()) } else { - self.current_ptr = self.checked_offset(size, "float")?; + self.skip_bytes(size, "float") } - Ok(()) } TYPE_UINT16 | TYPE_UINT32 | TYPE_INT32 | TYPE_UINT64 | TYPE_UINT128 => { // Numeric types - skip size bytes + let label = match type_num { + TYPE_UINT16 => "u16", + TYPE_UINT32 => "u32", + TYPE_INT32 => "i32", + TYPE_UINT64 => "u64", + TYPE_UINT128 => "u128", + _ => unreachable!(), + }; if follow_pointers { self.current_ptr += size; + Ok(()) } else { - let label = match type_num { - TYPE_UINT16 => "u16", - TYPE_UINT32 => "u32", - TYPE_INT32 => "i32", - TYPE_UINT64 => "u64", - TYPE_UINT128 => "u128", - _ => unreachable!(), - }; - self.current_ptr = self.checked_offset(size, label)?; + self.skip_bytes(size, label) } - Ok(()) } TYPE_BOOL => { // Boolean - size field IS the value, no data bytes to skip @@ -742,16 +815,20 @@ impl<'de> Decoder<'de> { } TYPE_MAP => { // Map - skip size key-value pairs + let child_depth = self.check_skip_depth(skip_depth)?; for _ in 0..size { - self.skip_value_inner_with_follow(follow_pointers)?; // key - self.skip_value_inner_with_follow(follow_pointers)?; // value + // key + self.skip_value_inner_with_follow(follow_pointers, child_depth)?; + // value + self.skip_value_inner_with_follow(follow_pointers, child_depth)?; } Ok(()) } TYPE_ARRAY => { // Array - skip size elements + let child_depth = self.check_skip_depth(skip_depth)?; for _ in 0..size { - self.skip_value_inner_with_follow(follow_pointers)?; + self.skip_value_inner_with_follow(follow_pointers, child_depth)?; } Ok(()) } @@ -759,9 +836,20 @@ impl<'de> Decoder<'de> { } } - fn skip_value_inner_with_follow(&mut self, follow_pointers: bool) -> DecodeResult<()> { + #[inline(always)] + fn skip_value_with_depth(&mut self, skip_depth: u16) -> DecodeResult<()> { + let (size, type_num) = self.size_and_type()?; + self.skip_value_inner(size, type_num, true, skip_depth) + } + + #[inline(always)] + fn skip_value_inner_with_follow( + &mut self, + follow_pointers: bool, + skip_depth: u16, + ) -> DecodeResult<()> { let (size, type_num) = self.size_and_type()?; - self.skip_value_inner(size, type_num, follow_pointers) + self.skip_value_inner(size, type_num, follow_pointers, skip_depth) } } @@ -939,6 +1027,9 @@ impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> { V: Visitor<'de>, { self.skip_value()?; + if self.depth == 0 { + self.validate_skip_end()?; + } visitor.visit_unit() } @@ -989,6 +1080,13 @@ impl<'de> SeqAccess<'de> for ArrayAccess<'_, 'de> { { // Check if there are no more elements. if self.count == 0 { + // Only top-level containers need an explicit final overrun check. + // Nested containers are bounded by the value that contains them. + if self.de.depth <= 1 && self.de.current_ptr > self.de.limit { + return Err(self + .de + .invalid_db_error("skipped value extends beyond buffer")); + } return Ok(None); } self.count -= 1; @@ -1018,6 +1116,13 @@ impl<'de> MapAccess<'de> for MapAccessor<'_, 'de> { { // Check if there are no more entries. if self.count == 0 { + // Only top-level containers need an explicit final overrun check. + // Nested containers are bounded by the value that contains them. + if self.de.depth <= 1 && self.de.current_ptr > self.de.limit { + return Err(self + .de + .invalid_db_error("skipped value extends beyond buffer")); + } return Ok(None); } self.count -= 1; @@ -1094,7 +1199,9 @@ impl<'de> de::VariantAccess<'de> for EnumAccessor<'_, 'de> { #[cfg(test)] mod tests { - use crate::Reader; + use crate::{MaxMindDbError, Reader}; + + use super::Decoder; #[test] fn test_decoder_accepts_tuple_with_matching_length() { @@ -1159,4 +1266,12 @@ mod tests { .to_string() .contains("expected tuple of length 2, got array of length 3")); } + + #[test] + fn test_skip_value_for_verification_rejects_truncated_pointer_payload() { + let mut decoder = Decoder::new(&[0x28], 0); + let err = decoder.skip_value_for_verification().unwrap_err(); + + assert!(matches!(err, MaxMindDbError::InvalidDatabase { .. })); + } } diff --git a/src/geoip2.rs b/src/geoip2.rs index a8813421..668925fb 100644 --- a/src/geoip2.rs +++ b/src/geoip2.rs @@ -351,7 +351,8 @@ pub struct Asn<'a> { /// Country/City database model structs. /// -/// These structs are used by both [`super::Country`] and [`super::City`] records. +/// These structs are used by both [`crate::geoip2::Country`] and +/// [`crate::geoip2::City`] records. pub mod country { use super::Names; use serde::{Deserialize, Serialize}; @@ -432,7 +433,8 @@ pub mod country { /// City database model structs. /// -/// City-specific structs. Country-level structs are re-exported from [`super::country`]. +/// City-specific structs. Country-level structs are re-exported from +/// [`crate::geoip2::country`]. pub mod city { use super::Names; use serde::{Deserialize, Serialize}; @@ -513,7 +515,7 @@ pub mod city { /// Enterprise database model structs. /// /// Enterprise-specific structs with confidence scores. Some structs are -/// re-exported from [`super::country`]. +/// re-exported from [`crate::geoip2::country`]. pub mod enterprise { use super::Names; use serde::{Deserialize, Serialize}; diff --git a/src/lib.rs b/src/lib.rs index b19c723d..36f39b4f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -263,7 +263,7 @@ mod tests { fn test_lookup_network_uses_measured_ipv4_subtree_depth() { let mut reader = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-ipv6-32.mmdb").unwrap(); - assert_eq!(reader.metadata.ip_version, 6); + assert_eq!(reader.metadata().ip_version, 6); // Simulate a valid IPv6 database whose IPv4 subtree starts somewhere // other than bit 96. Using a shallow subtree depth keeps the combined diff --git a/src/metadata.rs b/src/metadata.rs index 2cb4bc29..74f2c298 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -1,9 +1,12 @@ //! Database metadata types. use std::collections::BTreeMap; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use serde::{Deserialize, Serialize}; +use crate::error::MaxMindDbError; + /// Metadata about the MaxMind DB file. #[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq)] pub struct Metadata { @@ -31,6 +34,8 @@ impl Metadata { /// Returns the database build time as a `SystemTime`. /// /// This converts the `build_epoch` Unix timestamp to a `SystemTime`. + /// If `build_epoch` is too large to represent on this platform, this + /// returns an [`InvalidDatabase`](MaxMindDbError::InvalidDatabase) error. /// /// # Example /// @@ -38,11 +43,18 @@ impl Metadata { /// use maxminddb::Reader; /// /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// let build_time = reader.metadata.build_time(); + /// let build_time = reader.metadata().build_time().unwrap(); /// println!("Database built: {:?}", build_time); /// ``` - #[must_use] - pub fn build_time(&self) -> std::time::SystemTime { - std::time::UNIX_EPOCH + std::time::Duration::from_secs(self.build_epoch) + #[inline] + pub fn build_time(&self) -> Result { + UNIX_EPOCH + .checked_add(Duration::from_secs(self.build_epoch)) + .ok_or_else(|| { + MaxMindDbError::invalid_database(format!( + "build_epoch - Unix timestamp is too large to represent as SystemTime: {}", + self.build_epoch + )) + }) } } diff --git a/src/reader.rs b/src/reader.rs index 08eb3ec2..c2a34fb6 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -23,6 +23,7 @@ use crate::within::{IpInt, Within, WithinNode, WithinOptions}; /// Size of the data section separator (16 zero bytes). const DATA_SECTION_SEPARATOR_SIZE: usize = 16; +const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com"; /// A reader for the MaxMind DB format. The lifetime `'data` is tied to the /// lifetime of the underlying buffer holding the contents of the database file. @@ -40,10 +41,10 @@ const DATA_SECTION_SEPARATOR_SIZE: usize = 16; pub struct Reader> { pub(crate) buf: S, /// Database metadata. - pub metadata: Metadata, + metadata: Metadata, record_size: u16, /// Cached `Metadata::node_count` for `Reader` search-tree traversal. - /// Use this instead of `metadata.node_count`, which is publicly mutable. + /// Use this instead of `metadata.node_count` for traversal invariants. node_count: usize, /// Cached bytes per node derived from `Metadata::record_size` for `Reader`. /// Use this instead of `metadata.record_size` in lookup hot paths. @@ -53,6 +54,8 @@ pub struct Reader> { /// correct prefix lengths for IPv4 lookups in IPv6 databases. pub(crate) ipv4_start_bit_depth: usize, pub(crate) pointer_base: usize, + pub(crate) data_section_len: usize, + pub(crate) metadata_start: usize, } impl> std::fmt::Debug for Reader { @@ -63,6 +66,8 @@ impl> std::fmt::Debug for Reader { .field("ipv4_start", &self.ipv4_start) .field("ipv4_start_bit_depth", &self.ipv4_start_bit_depth) .field("pointer_base", &self.pointer_base) + .field("data_section_len", &self.data_section_len) + .field("metadata_start", &self.metadata_start) .finish_non_exhaustive() } } @@ -124,9 +129,12 @@ impl<'de, S: AsRef<[u8]>> Reader { /// ``` pub fn from_source(buf: S) -> Result, MaxMindDbError> { let metadata_start = find_metadata_start(buf.as_ref())?; + // find_metadata_start returns the offset after the marker; the marker + // bytes are not part of the data section and must stay out of limits. + let data_section_end = metadata_marker_start(metadata_start)?; let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0); let metadata = Metadata::deserialize(&mut type_decoder)?; - validate_record_size(metadata.record_size)?; + validate_metadata_for_reader(&metadata)?; let search_tree_size = search_tree_size_bytes(metadata.node_count as usize, metadata.record_size as usize)?; @@ -140,7 +148,8 @@ impl<'de, S: AsRef<[u8]>> Reader { "the MaxMind DB file's search tree extends beyond the file", ) })?; - validate_search_tree_layout(pointer_base, metadata_start)?; + validate_search_tree_layout(pointer_base, data_section_end)?; + let data_section_len = data_section_end - pointer_base; let mut reader = Reader { buf, @@ -148,6 +157,8 @@ impl<'de, S: AsRef<[u8]>> Reader { node_count, node_byte_size, pointer_base, + data_section_len, + metadata_start, metadata, ipv4_start: 0, ipv4_start_bit_depth: 0, @@ -159,6 +170,15 @@ impl<'de, S: AsRef<[u8]>> Reader { Ok(reader) } + /// Returns database metadata. + /// + /// Metadata is validated when the reader is created and exposed by + /// reference so it cannot be mutated independently of cached reader state. + #[inline] + pub fn metadata(&self) -> &Metadata { + &self.metadata + } + /// Lookup an IP address in the database. /// /// Returns a [`LookupResult`] that can be used to: @@ -166,7 +186,6 @@ impl<'de, S: AsRef<[u8]>> Reader { /// - Get the network containing the IP with [`network()`](LookupResult::network) /// - Decode the full record with [`decode()`](LookupResult::decode) /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path) - /// - Get a low-level decoder with [`decoder()`](LookupResult::decoder) /// /// # Examples /// @@ -361,12 +380,45 @@ impl<'de, S: AsRef<[u8]>> Reader { let mut node = self.start_node(bit_count); let node_count = self.node_count; + let has_ipv4_subtree = self.has_ipv4_subtree(); let mut stack: Vec = Vec::with_capacity(bit_count - prefix_len); + // `bit_count == 32` means the caller requested an IPv4 CIDR. In an + // IPv6 database with no IPv4 subtree, `start_node(32)` can already be a + // terminal IPv6 record reached by walking the all-zero prefix. Do not + // read that terminal value as a tree node; yield the containing IPv6 + // network instead, matching lookup behavior. + if bit_count == 32 + && self.metadata.ip_version == 6 + && !has_ipv4_subtree + && node >= node_count + { + stack.push(WithinNode { + node, + ip_int: IpInt::V6(0), + prefix_len: self.ipv4_start_bit_depth, + }); + + return Ok(Within { + reader: self, + node_count, + has_ipv4_subtree, + stack, + options, + }); + } + // Traverse down the tree to the level that matches the cidr mark let mut depth = 0_usize; for i in 0..prefix_len { + // `read_node` is only valid for internal search-tree nodes. + if node >= node_count { + // We've hit a data node or dead end before we exhausted our prefix. + // This means the requested CIDR is contained in a single record. + break; + } + let bit = ip_int.get_bit(i); node = self.read_node(node, bit as usize); depth = i + 1; // We've now traversed i+1 bits (bits 0 through i) @@ -391,6 +443,7 @@ impl<'de, S: AsRef<[u8]>> Reader { let within = Within { reader: self, node_count, + has_ipv4_subtree, stack, options, }; @@ -547,19 +600,8 @@ impl<'de, S: AsRef<[u8]>> Reader { "the MaxMind DB file's data pointer resolves to an invalid location", ) })?; - let data_section_len = self - .buf - .as_ref() - .len() - .checked_sub(self.pointer_base) - .ok_or_else(|| { - MaxMindDbError::invalid_database( - "the MaxMind DB file's data pointer resolves to an invalid location", - ) - })?; - - // Check bounds using pointer_base which marks the start of the data section - if resolved >= data_section_len { + // Reject offsets at or beyond the marker-excluding data section length. + if resolved >= self.data_section_len { return Err(MaxMindDbError::invalid_database( "the MaxMind DB file's data pointer resolves to an invalid location", )); @@ -596,25 +638,15 @@ impl<'de, S: AsRef<[u8]>> Reader { /// ``` pub fn verify(&self) -> Result<(), MaxMindDbError> { let metadata_start = find_metadata_start(self.buf.as_ref())?; - self.verify_metadata(metadata_start)?; - self.verify_database(metadata_start) + let data_section_end = metadata_marker_start(metadata_start)?; + self.verify_metadata(data_section_end)?; + self.verify_database(data_section_end) } - fn verify_metadata(&self, metadata_start: usize) -> Result<(), MaxMindDbError> { + fn verify_metadata(&self, data_section_end: usize) -> Result<(), MaxMindDbError> { let m = &self.metadata; - if m.binary_format_major_version != 2 { - return Err(MaxMindDbError::invalid_database(format!( - "binary_format_major_version - Expected: 2 Actual: {}", - m.binary_format_major_version - ))); - } - if m.binary_format_minor_version != 0 { - return Err(MaxMindDbError::invalid_database(format!( - "binary_format_minor_version - Expected: 0 Actual: {}", - m.binary_format_minor_version - ))); - } + validate_metadata_for_reader(m)?; if m.database_type.is_empty() { return Err(MaxMindDbError::invalid_database( "database_type - Expected: non-empty string Actual: \"\"", @@ -625,26 +657,14 @@ impl<'de, S: AsRef<[u8]>> Reader { "description - Expected: non-empty map Actual: {}", )); } - if m.ip_version != 4 && m.ip_version != 6 { - return Err(MaxMindDbError::invalid_database(format!( - "ip_version - Expected: 4 or 6 Actual: {}", - m.ip_version - ))); - } - validate_record_size(m.record_size)?; - if m.node_count == 0 { - return Err(MaxMindDbError::invalid_database( - "node_count - Expected: positive integer Actual: 0", - )); - } - validate_search_tree_layout(self.pointer_base, metadata_start)?; + validate_search_tree_layout(self.pointer_base, data_section_end)?; Ok(()) } - fn verify_database(&self, metadata_start: usize) -> Result<(), MaxMindDbError> { + fn verify_database(&self, data_section_end: usize) -> Result<(), MaxMindDbError> { let offsets = self.verify_search_tree()?; self.verify_data_section_separator()?; - self.verify_data_section(offsets, metadata_start) + self.verify_data_section(offsets, data_section_end) } fn verify_search_tree(&self) -> Result, MaxMindDbError> { @@ -700,9 +720,9 @@ impl<'de, S: AsRef<[u8]>> Reader { fn verify_data_section( &self, offsets: HashSet, - metadata_start: usize, + data_section_end: usize, ) -> Result<(), MaxMindDbError> { - let data_section = &self.buf.as_ref()[self.pointer_base..metadata_start]; + let data_section = &self.buf.as_ref()[self.pointer_base..data_section_end]; // Verify each offset from the search tree points to valid, decodable data for &offset in &offsets { @@ -742,6 +762,29 @@ fn validate_record_size(record_size: u16) -> Result<(), MaxMindDbError> { } } +pub(crate) fn validate_metadata_for_reader(metadata: &Metadata) -> Result<(), MaxMindDbError> { + if metadata.binary_format_major_version != 2 { + return Err(MaxMindDbError::invalid_database(format!( + "binary_format_major_version - Expected: 2 Actual: {}", + metadata.binary_format_major_version + ))); + } + // Minor format versions are intended to be forward-compatible. + if metadata.ip_version != 4 && metadata.ip_version != 6 { + return Err(MaxMindDbError::invalid_database(format!( + "ip_version - Expected: 4 or 6 Actual: {}", + metadata.ip_version + ))); + } + if metadata.node_count == 0 { + return Err(MaxMindDbError::invalid_database( + "node_count - Expected: positive integer Actual: 0", + )); + } + metadata.build_time()?; + validate_record_size(metadata.record_size) +} + fn search_tree_size_bytes(node_count: usize, record_size: usize) -> Result { node_count .checked_mul(record_size) @@ -755,9 +798,9 @@ fn search_tree_size_bytes(node_count: usize, record_size: usize) -> Result Result<(), MaxMindDbError> { - if pointer_base > metadata_start { + if pointer_base > data_section_end { return Err(MaxMindDbError::invalid_database( "the MaxMind DB file's search tree extends beyond the metadata section", )); @@ -870,11 +913,15 @@ fn normalize_lookup_result(node: usize, node_count: usize, prefix_len: usize) -> } fn find_metadata_start(buf: &[u8]) -> Result { - const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com"; - memchr::memmem::rfind(buf, METADATA_START_MARKER) .map(|x| x + METADATA_START_MARKER.len()) .ok_or_else(|| { MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file") }) } + +fn metadata_marker_start(metadata_start: usize) -> Result { + metadata_start + .checked_sub(METADATA_START_MARKER.len()) + .ok_or_else(|| MaxMindDbError::invalid_database("invalid metadata marker location")) +} diff --git a/src/reader_test.rs b/src/reader_test.rs index 329e7cd8..dc9db792 100644 --- a/src/reader_test.rs +++ b/src/reader_test.rs @@ -1,10 +1,12 @@ use std::net::IpAddr; +use std::time::{Duration, UNIX_EPOCH}; use ipnetwork::IpNetwork; use serde::Deserialize; use serde_json::json; use crate::geoip2; +use crate::reader::validate_metadata_for_reader; use crate::{MaxMindDbError, Reader, Within, WithinOptions}; const TEST_DATABASE_CONFIGS: &[(usize, usize)] = @@ -108,19 +110,19 @@ fn test_pointers_in_metadata() { let reader = open_test_data_reader("MaxMind-DB-test-metadata-pointers.mmdb"); assert_eq!( - reader.metadata.database_type, + reader.metadata().database_type, "Lots of pointers in metadata" ); assert_eq!( - reader.metadata.description["en"], + reader.metadata().description["en"], "Lots of pointers in metadata" ); assert_eq!( - reader.metadata.description["es"], + reader.metadata().description["es"], "Lots of pointers in metadata" ); assert_eq!( - reader.metadata.description["zh"], + reader.metadata().description["zh"], "Lots of pointers in metadata" ); @@ -465,7 +467,7 @@ fn test_within_city() { } fn check_metadata>(reader: &Reader, ip_version: usize, record_size: usize) { - let metadata = &reader.metadata; + let metadata = reader.metadata(); assert_eq!(metadata.binary_format_major_version, 2_u16); assert_eq!(metadata.binary_format_minor_version, 0_u16); @@ -494,15 +496,78 @@ fn check_metadata>(reader: &Reader, ip_version: usize, record_ } #[test] -fn test_lookup_uses_cached_record_size_after_metadata_mutation() { +fn test_metadata_build_time_conversion() { init_logger(); - let mut reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb"); - reader.metadata.record_size = 0; + let reader = open_test_data_reader("GeoIP2-City-Test.mmdb"); - let lookup = reader.lookup("1.1.1.1".parse().unwrap()).unwrap(); - assert!(lookup.has_data()); - assert_eq!(lookup.network().unwrap().to_string(), "1.1.1.1/32"); + assert_eq!( + reader.metadata().build_time().unwrap(), + UNIX_EPOCH + Duration::from_secs(reader.metadata().build_epoch) + ); +} + +#[test] +fn test_metadata_build_time_rejects_uint64_max_epoch() { + init_logger(); + + let err = + Reader::open_readfile("test-data/bad-data/libmaxminddb/libmaxminddb-uint64-max-epoch.mmdb") + .unwrap_err(); + + assert!( + matches!( + err, + MaxMindDbError::InvalidDatabase { ref message, .. } + if message + == "build_epoch - Unix timestamp is too large to represent as SystemTime: 18446744073709551615" + ), + "Expected InvalidDatabase error for unrepresentable build_epoch, got {err:?}" + ); +} + +#[test] +fn test_reader_metadata_accessor_returns_validated_metadata() { + init_logger(); + + let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb"); + + assert_eq!(reader.metadata().record_size, 24); + assert_eq!(reader.metadata().ip_version, 4); + assert_eq!(reader.metadata().database_type, "Test"); +} + +#[test] +fn test_metadata_validation_rejects_hard_invariants() { + init_logger(); + + let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb"); + let metadata = reader.metadata(); + + let mut invalid = metadata.clone(); + invalid.binary_format_major_version = 3; + assert!(matches!( + validate_metadata_for_reader(&invalid), + Err(MaxMindDbError::InvalidDatabase { .. }) + )); + + let mut future_minor = metadata.clone(); + future_minor.binary_format_minor_version = u16::MAX; + validate_metadata_for_reader(&future_minor).unwrap(); + + let mut invalid = metadata.clone(); + invalid.ip_version = 5; + assert!(matches!( + validate_metadata_for_reader(&invalid), + Err(MaxMindDbError::InvalidDatabase { .. }) + )); + + let mut invalid = metadata.clone(); + invalid.node_count = 0; + assert!(matches!( + validate_metadata_for_reader(&invalid), + Err(MaxMindDbError::InvalidDatabase { .. }) + )); } #[test] @@ -511,7 +576,7 @@ fn test_resolve_data_pointer_rejects_small_pointer() { let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb"); let err = reader - .resolve_data_pointer(reader.metadata.node_count as usize) + .resolve_data_pointer(reader.metadata().node_count as usize) .unwrap_err(); assert!(matches!(err, MaxMindDbError::InvalidDatabase { .. })); @@ -1054,6 +1119,20 @@ fn test_within_rejects_ipv6_cidr_for_ipv4_database() { } } +#[test] +fn test_within_no_ipv4_search_tree() { + init_logger(); + + let reader = open_test_data_reader("MaxMind-DB-no-ipv4-search-tree.mmdb"); + + for cidr in ["::/0", "::/64", "0.0.0.0/0", "200.0.2.1/32"] { + let cidr: IpNetwork = cidr.parse().unwrap(); + let networks = collect_networks(reader.within(cidr, Default::default()).unwrap()); + + assert_eq!(networks, vec!["::/64"], "unexpected networks for {cidr}"); + } +} + /// Test that verify() succeeds on valid databases (matching Go's TestVerifyOnGoodDatabases) #[test] fn test_verify_good_databases() { @@ -1124,6 +1203,38 @@ fn test_verify_broken_pointers() { ); } +#[test] +fn test_rejects_data_pointer_to_metadata_marker() { + init_logger(); + + let source_path = "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb"; + let reader = Reader::open_readfile(source_path).unwrap(); + assert_eq!(reader.metadata().record_size, 24); + + let pointer = reader.metadata().node_count as usize + 16 + reader.data_section_len; + assert!(pointer <= 0x00ff_ffff); + + let mut bytes = std::fs::read(source_path).unwrap(); + for record in bytes[..6].chunks_exact_mut(3) { + record[0] = ((pointer >> 16) & 0xff) as u8; + record[1] = ((pointer >> 8) & 0xff) as u8; + record[2] = (pointer & 0xff) as u8; + } + + let reader = Reader::from_source(bytes).unwrap(); + let err = reader.lookup("1.1.1.1".parse().unwrap()).unwrap_err(); + assert!( + matches!(err, MaxMindDbError::InvalidDatabase { .. }), + "Expected InvalidDatabase error for marker pointer, got {err:?}" + ); + + let result = reader.verify(); + assert!( + matches!(result, Err(MaxMindDbError::InvalidDatabase { .. })), + "Expected InvalidDatabase error for marker pointer during verify, got {result:?}" + ); +} + #[test] fn test_verify_broken_search_tree() { init_logger(); @@ -1175,6 +1286,60 @@ fn test_verify_rejects_truncated_scalar_value() { result ); } + +#[test] +fn test_decode_rejects_truncated_ignored_scalar_value() { + init_logger(); + + let source_path = "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb"; + let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb"); + let lookup = reader.lookup("1.1.1.32".parse().unwrap()).unwrap(); + let data_offset = lookup.offset().expect("expected data offset"); + let mut bytes = std::fs::read(source_path).unwrap(); + let record_start = reader.pointer_base + data_offset; + let string_value = b"1.1.1.32"; + let relative_value_offset = bytes[record_start..] + .windows(string_value.len()) + .position(|window| window == string_value) + .expect("expected terminal string payload in fixture record"); + let string_ctrl_offset = record_start + relative_value_offset - 1; + assert_eq!( + bytes[string_ctrl_offset], 0x48, + "unexpected string control byte in source fixture" + ); + + // Inflate the terminal string from length 8 to length 28 without adding + // bytes. Decoding into a struct with no fields forces serde to skip the + // corrupt value as unknown data. + bytes[string_ctrl_offset] = 0x5c; + + #[derive(Deserialize, Debug)] + struct Empty {} + + let reader = Reader::from_source(bytes).unwrap(); + let lookup = reader.lookup("1.1.1.32".parse().unwrap()).unwrap(); + let err = lookup.decode::().unwrap_err(); + + assert!(matches!(err, MaxMindDbError::InvalidDatabase { .. })); +} + +#[test] +fn test_decode_rejects_deep_nesting_in_ignored_values() { + init_logger(); + + let reader = + Reader::open_readfile("test-data/bad-data/libmaxminddb/libmaxminddb-deep-nesting.mmdb") + .unwrap(); + let lookup = reader.lookup("1.1.1.1".parse().unwrap()).unwrap(); + let err = lookup.decode::().unwrap_err(); + + assert!( + err.to_string() + .contains("exceeded maximum data structure depth"), + "unexpected error: {err}" + ); +} + /// Test that size hints are properly returned for sequences and maps #[test] fn test_size_hints() { diff --git a/src/result.rs b/src/result.rs index 8b3253bb..e30ca044 100644 --- a/src/result.rs +++ b/src/result.rs @@ -74,7 +74,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { #[inline] fn decoder(&self, offset: usize) -> super::decoder::Decoder<'a> { let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; - super::decoder::Decoder::new(buf, offset) + super::decoder::Decoder::new_with_limit(buf, offset, self.reader.data_section_len) } /// Creates a new LookupResult for a found IP. @@ -281,6 +281,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { } if !found { + decoder.validate_skip_end().map_err(with_path)?; return Ok(None); } } diff --git a/src/within.rs b/src/within.rs index 4aaf75e4..9719ba0c 100644 --- a/src/within.rs +++ b/src/within.rs @@ -1,7 +1,7 @@ //! Network iteration types. use std::cmp::Ordering; -use std::net::IpAddr; +use std::net::{IpAddr, Ipv6Addr}; use crate::decoder; use crate::error::MaxMindDbError; @@ -111,6 +111,7 @@ pub(crate) struct WithinNode { pub struct Within<'de, S: AsRef<[u8]>> { pub(crate) reader: &'de Reader, pub(crate) node_count: usize, + pub(crate) has_ipv4_subtree: bool, pub(crate) stack: Vec, pub(crate) options: WithinOptions, } @@ -180,8 +181,6 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { match current.node.cmp(&self.node_count) { Ordering::Greater => { // This is a data node, emit it and we're done (until the following next call) - let ip_addr = ip_int_to_addr(¤t.ip_int); - // Resolve the pointer to a data offset let data_offset = match self.reader.resolve_data_pointer(current.node) { Ok(offset) => offset, @@ -197,17 +196,8 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { } } - let network_kind = match current.ip_int { - IpInt::V4(_) => NetworkKind::V4, - IpInt::V6(_) - if current.ip_int.is_ipv4_in_ipv6() - && self.reader.has_ipv4_subtree() - && current.prefix_len >= self.reader.ipv4_start_bit_depth => - { - NetworkKind::V4InV6Subtree - } - IpInt::V6(_) => NetworkKind::V6, - }; + let network_kind = self.network_kind(¤t); + let ip_addr = network_ip_addr(network_kind, current.ip_int); return Some(Ok(LookupResult::new_found( self.reader, @@ -221,18 +211,8 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { Ordering::Equal => { // Dead end (no data) - include if option is set if self.options.include_networks_without_data { - let ip_addr = ip_int_to_addr(¤t.ip_int); - let network_kind = match current.ip_int { - IpInt::V4(_) => NetworkKind::V4, - IpInt::V6(_) - if current.ip_int.is_ipv4_in_ipv6() - && self.reader.has_ipv4_subtree() - && current.prefix_len >= self.reader.ipv4_start_bit_depth => - { - NetworkKind::V4InV6Subtree - } - IpInt::V6(_) => NetworkKind::V6, - }; + let network_kind = self.network_kind(¤t); + let ip_addr = network_ip_addr(network_kind, current.ip_int); return Some(Ok(LookupResult::new_not_found( self.reader, current.prefix_len as u8, @@ -263,6 +243,24 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { } impl<'de, S: AsRef<[u8]>> Within<'de, S> { + #[inline(always)] + fn network_kind(&self, node: &WithinNode) -> NetworkKind { + match node.ip_int { + IpInt::V4(_) if self.reader.metadata().ip_version == 6 && !self.has_ipv4_subtree => { + NetworkKind::V6 + } + IpInt::V4(_) => NetworkKind::V4, + IpInt::V6(_) + if node.ip_int.is_ipv4_in_ipv6() + && self.has_ipv4_subtree + && node.prefix_len >= self.reader.ipv4_start_bit_depth => + { + NetworkKind::V4InV6Subtree + } + IpInt::V6(_) => NetworkKind::V6, + } + } + fn push_child( &mut self, parent_node: usize, @@ -281,7 +279,8 @@ impl<'de, S: AsRef<[u8]>> Within<'de, S> { /// Check if the value at the given data offset is an empty map or array. fn is_empty_value_at(&self, data_offset: usize) -> Result { let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; - let mut dec = decoder::Decoder::new(buf, data_offset); + let mut dec = + decoder::Decoder::new_with_limit(buf, data_offset, self.reader.data_section_len); let (size, type_num) = dec.peek_type()?; match type_num { decoder::TYPE_MAP | decoder::TYPE_ARRAY => Ok(size == 0), @@ -290,12 +289,28 @@ impl<'de, S: AsRef<[u8]>> Within<'de, S> { } } +#[inline(always)] +fn network_ip_addr(network_kind: NetworkKind, ip_int: IpInt) -> IpAddr { + let ip_addr = ip_int_to_addr(&ip_int); + match (network_kind, ip_int, ip_addr) { + // Low IPv6 tree keys are normally formatted as IPv4 addresses for IPv4 + // subtree records. When the record is really IPv6, preserve the low + // IPv6 bits instead of collapsing the network to ::. + (NetworkKind::V6, IpInt::V6(ip), IpAddr::V4(_)) => IpAddr::V6(ip.into()), + // Defensive fallback for IPv4 CIDR iteration in IPv6 databases without + // an IPv4 subtree. Reader::within() should seed those as IpInt::V6(0). + (NetworkKind::V6, IpInt::V4(_), IpAddr::V4(_)) => IpAddr::V6(Ipv6Addr::UNSPECIFIED), + (_, _, ip_addr) => ip_addr, + } +} + /// Convert IpInt to IpAddr +#[inline(always)] pub(crate) fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr { match ip_int { IpInt::V4(ip) => IpAddr::V4((*ip).into()), IpInt::V6(ip) => { - // Check if this is an IPv4-mapped IPv6 address + // Check if this is an IPv4-mapped IPv6 address. if *ip <= 0xFFFFFFFF { IpAddr::V4((*ip as u32).into()) } else { @@ -304,3 +319,28 @@ pub(crate) fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr { } } } + +#[cfg(test)] +mod tests { + use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; + + use crate::result::NetworkKind; + + use super::{network_ip_addr, IpInt}; + + #[test] + fn network_ip_addr_preserves_low_ipv6_bits() { + assert_eq!( + network_ip_addr(NetworkKind::V6, IpInt::V6(1)), + IpAddr::V6(Ipv6Addr::from(1_u128)) + ); + } + + #[test] + fn network_ip_addr_formats_ipv4_subtree_records_as_ipv4() { + assert_eq!( + network_ip_addr(NetworkKind::V4InV6Subtree, IpInt::V6(1)), + IpAddr::V4(Ipv4Addr::from(1_u32)) + ); + } +}