diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
index b221aed4..e6dff4c1 100644
--- a/.github/workflows/doc.yml
+++ b/.github/workflows/doc.yml
@@ -1,14 +1,33 @@
-name: Deploy Docs to GitHub Pages
+name: Documentation
on:
push:
branches:
- main
+ pull_request:
jobs:
- release:
+ doc-check:
+ name: Rustdoc
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout Repository
+ uses: actions/checkout@v6
+
+ - name: Install Rust toolchain
+ uses: dtolnay/rust-toolchain@stable
+
+ - name: Build documentation with warnings denied
+ run: cargo doc --all --no-deps
+ env:
+ RUSTDOCFLAGS: -D warnings
+
+ deploy:
name: GitHub Pages
runs-on: ubuntu-latest
+ needs: doc-check
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
steps:
- name: Checkout Repository
@@ -19,6 +38,8 @@ jobs:
- name: Build Documentation
run: cargo doc --all --no-deps
+ env:
+ RUSTDOCFLAGS: -D warnings
- name: Create index
run: echo '' > target/doc/index.html
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 57ba8677..69d6489f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,28 @@
# Change Log
+## 0.29.0 - TBD
+
+- Breaking: `Metadata::build_time()` now returns
+ `Result` instead of panicking on
+ unrepresentable timestamps, and databases whose `build_epoch` cannot be
+ represented as `SystemTime` are rejected as invalid during open and verify.
+- Breaking: `Reader::metadata` is now private. Use `Reader::metadata()` to
+ access validated metadata by shared reference.
+- Breaking: Opening a database now rejects unsupported major format versions,
+ unsupported IP versions, and zero-node search trees. Minor format versions
+ are still accepted for forward compatibility.
+- Fixed: `within()` and `networks()` now handle IPv6 databases without an
+ IPv4 subtree without reading terminal data nodes as tree nodes or
+ panicking while formatting low IPv6 networks.
+- Fixed: Skipping ignored or unknown fields now enforces data-section
+ bounds and the maximum nesting depth instead of accepting truncated
+ values or overflowing the stack on deeply nested corrupt data.
+- Internal: Added focused benchmarks for network iteration and serde
+ unknown-field skipping, plus metadata build-time conversion, to guard
+ performance-sensitive fixes.
+- Documentation: Fixed intra-doc links so docs build with warnings denied,
+ and added a CI check to keep them passing.
+
## 0.28.1 - 2026-04-26
- Fixed: Databases with an impossible declared search tree size are now
diff --git a/Cargo.toml b/Cargo.toml
index 337e5ce5..3eedd639 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "maxminddb"
-version = "0.28.1"
+version = "0.29.0"
authors = [ "Gregory J. Oschwald " ]
description = "Library for reading MaxMind DB format used by GeoIP2 and GeoLite2"
readme = "README.md"
@@ -47,3 +47,11 @@ harness = false
[[bench]]
name = "serde_usage"
harness = false
+
+[[bench]]
+name = "within"
+harness = false
+
+[[bench]]
+name = "metadata"
+harness = false
diff --git a/README.md b/README.md
index 507c17fe..0356c72b 100644
--- a/README.md
+++ b/README.md
@@ -28,14 +28,14 @@ Add this to your `Cargo.toml`:
```toml
[dependencies]
-maxminddb = "0.28"
+maxminddb = "0.29"
```
Enable optional features as needed:
```toml
[dependencies]
-maxminddb = { version = "0.28", features = ["mmap"] }
+maxminddb = { version = "0.29", features = ["mmap"] }
```
## Example
@@ -113,7 +113,7 @@ Enable in `Cargo.toml`:
```toml
[dependencies]
-maxminddb = { version = "0.28", features = ["mmap"] }
+maxminddb = { version = "0.29", features = ["mmap"] }
```
Note: `simdutf8` and `unsafe-str-decode` are mutually exclusive.
diff --git a/UPGRADING.md b/UPGRADING.md
index 4043d847..94b9de9b 100644
--- a/UPGRADING.md
+++ b/UPGRADING.md
@@ -1,5 +1,49 @@
# Upgrading Guide
+## 0.28 to 0.29
+
+### Reader Metadata
+
+`Reader::metadata` is now private. Use `Reader::metadata()` to access
+validated metadata by shared reference.
+
+**Before (0.28):**
+
+```rust
+let database_type = &reader.metadata.database_type;
+```
+
+**After (0.29):**
+
+```rust
+let database_type = &reader.metadata().database_type;
+```
+
+### Metadata Build Time
+
+`Metadata::build_time()` now returns `Result`.
+Databases whose `build_epoch` cannot be represented as `SystemTime` are
+rejected as invalid when opened or verified.
+
+**Before (0.28):**
+
+```rust
+let build_time = reader.metadata.build_time();
+```
+
+**After (0.29):**
+
+```rust
+let build_time = reader.metadata().build_time()?;
+```
+
+### Metadata Validation
+
+Opening a database now rejects unsupported major format versions, unsupported
+IP versions, zero-node search trees, and unrepresentable build epochs as
+invalid databases. Minor format versions are still accepted for forward
+compatibility.
+
## 0.26 to 0.27
This release includes significant API changes to improve ergonomics and enable
diff --git a/benches/metadata.rs b/benches/metadata.rs
new file mode 100644
index 00000000..ecff14a0
--- /dev/null
+++ b/benches/metadata.rs
@@ -0,0 +1,20 @@
+use std::hint::black_box;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+const DB_FILE: &str = "GeoLite2-City.mmdb";
+
+pub fn metadata_benchmark(c: &mut Criterion) {
+ let reader = maxminddb::Reader::open_readfile(DB_FILE).unwrap();
+
+ c.bench_function("metadata/build_time", |b| {
+ b.iter(|| black_box(reader.metadata().build_time().unwrap()))
+ });
+}
+
+criterion_group! {
+ name = benches;
+ config = Criterion::default().sample_size(20);
+ targets = metadata_benchmark
+}
+criterion_main!(benches);
diff --git a/benches/serde_usage.rs b/benches/serde_usage.rs
index 7d6be999..5a269fb7 100644
--- a/benches/serde_usage.rs
+++ b/benches/serde_usage.rs
@@ -10,6 +10,9 @@ use common::{generate_ipv4, open_reader};
const DB_FILE: &str = "GeoLite2-City.mmdb";
+#[derive(serde::Deserialize)]
+struct EmptyRecord {}
+
fn cache_lookups<'a, T>(ips: &[IpAddr], reader: &'a Reader) -> Vec>
where
T: AsRef<[u8]>,
@@ -50,6 +53,16 @@ where
}
}
+fn bench_decode_empty_record(results: &[LookupResult<'_, T>])
+where
+ T: AsRef<[u8]>,
+{
+ for result in results {
+ let empty: Option = result.decode().unwrap();
+ black_box(empty);
+ }
+}
+
fn bench_decode_path_country_iso(results: &[LookupResult<'_, T>])
where
T: AsRef<[u8]>,
@@ -91,6 +104,9 @@ pub fn serde_usage_benchmark(c: &mut Criterion) {
c.bench_function("serde_usage/decode_country_only", |b| {
b.iter(|| bench_decode_country_only(&cached_results))
});
+ c.bench_function("serde_usage/decode_empty_record", |b| {
+ b.iter(|| bench_decode_empty_record(&cached_results))
+ });
c.bench_function("serde_usage/decode_path_country_iso", |b| {
b.iter(|| bench_decode_path_country_iso(&cached_results))
});
diff --git a/benches/within.rs b/benches/within.rs
new file mode 100644
index 00000000..ef3207be
--- /dev/null
+++ b/benches/within.rs
@@ -0,0 +1,61 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use maxminddb::{geoip2, Reader};
+use std::hint::black_box;
+
+fn bench_networks_city_test(c: &mut Criterion) {
+ let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
+
+ c.bench_function("within/networks_city_test", |b| {
+ b.iter(|| {
+ let mut count = 0usize;
+ for result in reader.networks(Default::default()).unwrap() {
+ let lookup = result.unwrap();
+ black_box(lookup.network().unwrap());
+ count += 1;
+ }
+ black_box(count);
+ });
+ });
+}
+
+fn bench_within_city_subnet(c: &mut Criterion) {
+ let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
+ let cidr = "81.2.69.0/24".parse().unwrap();
+
+ c.bench_function("within/city_subnet", |b| {
+ b.iter(|| {
+ let mut count = 0usize;
+ for result in reader.within(cidr, Default::default()).unwrap() {
+ let lookup = result.unwrap();
+ let city: Option> = lookup.decode().unwrap();
+ black_box(city);
+ count += 1;
+ }
+ black_box(count);
+ });
+ });
+}
+
+fn bench_networks_mixed_test(c: &mut Criterion) {
+ let reader =
+ Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap();
+
+ c.bench_function("within/networks_mixed_test", |b| {
+ b.iter(|| {
+ let mut count = 0usize;
+ for result in reader.networks(Default::default()).unwrap() {
+ let lookup = result.unwrap();
+ black_box(lookup.network().unwrap());
+ count += 1;
+ }
+ black_box(count);
+ });
+ });
+}
+
+criterion_group! {
+ name = benches;
+ config = Criterion::default().sample_size(20);
+ targets = bench_networks_city_test, bench_within_city_subnet, bench_networks_mixed_test
+}
+criterion_main!(benches);
diff --git a/src/decoder.rs b/src/decoder.rs
index 351457b0..81bb2c11 100644
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -34,6 +34,11 @@ const TYPE_FLOAT: u8 = 15;
/// This matches the value used in libmaxminddb and the Go reader.
const MAXIMUM_DATA_STRUCTURE_DEPTH: u16 = 512;
+/// Lower limit for values skipped through unknown fields or IgnoredAny.
+/// Skipping is recursive and can be reached by corrupt data that callers did
+/// not explicitly request, so keep the limit below small default thread stacks.
+const MAXIMUM_SKIPPED_DATA_STRUCTURE_DEPTH: u16 = 128;
+
#[inline(always)]
fn to_usize(base: u8, bytes: &[u8]) -> usize {
bytes
@@ -49,11 +54,12 @@ macro_rules! decode_int_like {
let new_offset = self
.current_ptr
.checked_add(size)
- .filter(|&offset| offset <= self.buf.len())
+ .filter(|&offset| offset <= self.limit)
.ok_or_else(|| {
self.invalid_db_error(&format!("{} of size {}", $label, size))
})?;
- let value = self.buf[self.current_ptr..new_offset]
+ let value = self
+ .slice(self.current_ptr, new_offset)
.iter()
.fold($zero, |acc, &b| (acc << 8) | <$ty>::from(b));
self.current_ptr = new_offset;
@@ -102,14 +108,21 @@ enum Value<'a, 'de> {
#[derive(Debug)]
pub(crate) struct Decoder<'de> {
buf: &'de [u8],
+ limit: usize,
current_ptr: usize,
depth: u16,
}
impl<'de> Decoder<'de> {
pub(crate) fn new(buf: &'de [u8], start_ptr: usize) -> Decoder<'de> {
+ Decoder::new_with_limit(buf, start_ptr, buf.len())
+ }
+
+ pub(crate) fn new_with_limit(buf: &'de [u8], start_ptr: usize, limit: usize) -> Decoder<'de> {
+ debug_assert!(limit <= buf.len());
Decoder {
buf,
+ limit,
current_ptr: start_ptr,
depth: 0,
}
@@ -158,18 +171,40 @@ impl<'de> Decoder<'de> {
#[inline(always)]
fn checked_offset(&self, size: usize, label: &str) -> DecodeResult {
let new_offset = self.current_ptr.wrapping_add(size);
- if new_offset < self.current_ptr || new_offset > self.buf.len() {
+ if new_offset < self.current_ptr || new_offset > self.limit {
return Err(self.invalid_db_error(&format!("{label} of size {size}")));
}
Ok(new_offset)
}
+ #[inline(always)]
+ fn slice(&self, start: usize, end: usize) -> &'de [u8] {
+ debug_assert!(start <= end);
+ debug_assert!(end <= self.limit);
+ debug_assert!(self.limit <= self.buf.len());
+ // SAFETY: Decoder constructors ensure `limit <= buf.len()`, and all
+ // callers reach this helper only after checking `end <= limit`.
+ unsafe { self.buf.get_unchecked(start..end) }
+ }
+
+ #[inline(always)]
+ fn skip_bytes(&mut self, size: usize, label: &str) -> DecodeResult<()> {
+ if self.current_ptr > self.limit || size > self.limit - self.current_ptr {
+ return Err(self.invalid_db_error(&format!("{label} of size {size}")));
+ }
+ self.current_ptr += size;
+ Ok(())
+ }
+
#[inline(always)]
fn eat_byte(&mut self) -> DecodeResult {
- let b = *self
- .buf
- .get(self.current_ptr)
- .ok_or_else(|| self.invalid_db_error("unexpected end of buffer"))?;
+ if self.current_ptr >= self.limit {
+ return Err(self.invalid_db_error("unexpected end of buffer"));
+ }
+ debug_assert!(self.limit <= self.buf.len());
+ // SAFETY: The check above proves `current_ptr < limit`, and decoder
+ // construction guarantees `limit <= buf.len()`.
+ let b = unsafe { *self.buf.get_unchecked(self.current_ptr) };
self.current_ptr += 1;
Ok(b)
}
@@ -314,7 +349,7 @@ impl<'de> Decoder<'de> {
fn decode_bytes(&mut self, size: usize) -> DecodeResult<&'de [u8]> {
let new_offset = self.checked_offset(size, "bytes")?;
- let u8_slice = &self.buf[self.current_ptr..new_offset];
+ let u8_slice = self.slice(self.current_ptr, new_offset);
self.current_ptr = new_offset;
Ok(u8_slice)
@@ -322,7 +357,8 @@ impl<'de> Decoder<'de> {
fn decode_float(&mut self, size: usize) -> DecodeResult {
let new_offset = self.checked_offset(size, "float")?;
- let value: [u8; 4] = self.buf[self.current_ptr..new_offset]
+ let value: [u8; 4] = self
+ .slice(self.current_ptr, new_offset)
.try_into()
.map_err(|_| self.invalid_db_error(&format!("float of size {size}")))?;
self.current_ptr = new_offset;
@@ -332,7 +368,8 @@ impl<'de> Decoder<'de> {
fn decode_double(&mut self, size: usize) -> DecodeResult {
let new_offset = self.checked_offset(size, "double")?;
- let value: [u8; 8] = self.buf[self.current_ptr..new_offset]
+ let value: [u8; 8] = self
+ .slice(self.current_ptr, new_offset)
.try_into()
.map_err(|_| self.invalid_db_error(&format!("double of size {size}")))?;
self.current_ptr = new_offset;
@@ -351,7 +388,7 @@ impl<'de> Decoder<'de> {
let new_offset = self
.current_ptr
.checked_add(size)
- .filter(|&offset| offset <= self.buf.len())
+ .filter(|&offset| offset <= self.limit)
.ok_or_else(|| self.invalid_db_error(&format!("{label} of size {}", size)))?;
let p = self.current_ptr;
let value = match size {
@@ -387,7 +424,7 @@ impl<'de> Decoder<'de> {
let new_offset = self
.current_ptr
.checked_add(size)
- .filter(|&offset| offset <= self.buf.len())
+ .filter(|&offset| offset <= self.limit)
.ok_or_else(|| self.invalid_db_error(&format!("u16 of size {}", size)))?;
let p = self.current_ptr;
let value = match size {
@@ -415,17 +452,17 @@ impl<'de> Decoder<'de> {
let pointer_value_offset = [0, 0, 2048, 526_336, 0];
let pointer_size = ((size >> 3) & 0x3) + 1;
let p = self.current_ptr;
- let len = self.buf.len();
+ let limit = self.limit;
let new_offset = match p.checked_add(pointer_size) {
- Some(offset) if offset <= len => offset,
+ Some(offset) if offset <= limit => offset,
_ => {
// Clamp to the end of the buffer so the next decode step fails
// with a normal bounds error instead of panicking here.
- self.current_ptr = len;
- return len;
+ self.current_ptr = limit;
+ return limit;
}
};
- let pointer_bytes = &self.buf[p..new_offset];
+ let pointer_bytes = self.slice(p, new_offset);
self.current_ptr = new_offset;
let base = if pointer_size == 4 {
@@ -443,7 +480,7 @@ impl<'de> Decoder<'de> {
use std::str::from_utf8_unchecked;
let new_offset = self.checked_offset(size, "string")?;
- let bytes = &self.buf[self.current_ptr..new_offset];
+ let bytes = self.slice(self.current_ptr, new_offset);
self.current_ptr = new_offset;
// SAFETY:
// A corrupt maxminddb will cause undefined behaviour.
@@ -464,7 +501,7 @@ impl<'de> Decoder<'de> {
use std::str::from_utf8_unchecked;
let new_offset = self.checked_offset(size, "string")?;
- let bytes = &self.buf[self.current_ptr..new_offset];
+ let bytes = self.slice(self.current_ptr, new_offset);
self.current_ptr = new_offset;
if bytes.is_ascii() {
// ASCII is valid UTF-8, so this avoids the full validator fast path.
@@ -579,10 +616,10 @@ impl<'de> Decoder<'de> {
.current_ptr
.checked_add(size)
.ok_or_else(|| self.invalid_db_error("string length exceeds buffer"))?;
- if new_offset > self.buf.len() {
+ if new_offset > self.limit {
return Err(self.invalid_db_error("string length exceeds buffer"));
}
- let bytes = &self.buf[self.current_ptr..new_offset];
+ let bytes = self.slice(self.current_ptr, new_offset);
self.current_ptr = new_offset;
Ok(bytes)
}
@@ -655,45 +692,81 @@ impl<'de> Decoder<'de> {
/// Skips the current value, following pointers.
pub(crate) fn skip_value(&mut self) -> DecodeResult<()> {
let (size, type_num) = self.size_and_type()?;
- self.skip_value_inner(size, type_num, true)
+ self.skip_value_inner(size, type_num, true, 0)
}
/// Skips the current value without following pointers (for verification).
pub(crate) fn skip_value_for_verification(&mut self) -> DecodeResult<()> {
let (size, type_num) = self.size_and_type()?;
- self.skip_value_inner(size, type_num, false)
+ self.skip_value_inner(size, type_num, false, 0)?;
+ self.validate_skip_end()
}
+ #[inline(always)]
+ pub(crate) fn validate_skip_end(&mut self) -> DecodeResult<()> {
+ if self.current_ptr > self.limit {
+ return Err(self.invalid_db_error("skipped value extends beyond buffer"));
+ }
+ Ok(())
+ }
+
+ #[inline(always)]
+ fn check_skip_depth(&self, skip_depth: u16) -> DecodeResult {
+ if skip_depth == MAXIMUM_SKIPPED_DATA_STRUCTURE_DEPTH {
+ return self.skip_depth_error();
+ }
+ Ok(skip_depth + 1)
+ }
+
+ #[cold]
+ fn skip_depth_error(&self) -> DecodeResult {
+ Err(self
+ .invalid_db_error("exceeded maximum data structure depth; database is likely corrupt"))
+ }
+
+ #[inline(always)]
fn skip_value_inner(
&mut self,
size: usize,
type_num: u8,
follow_pointers: bool,
+ skip_depth: u16,
) -> DecodeResult<()> {
+ // When following pointers during normal serde skips, scalar payload
+ // bounds are validated once the whole skipped value is consumed. The
+ // no-follow verification path validates each raw payload immediately.
match type_num {
TYPE_POINTER => {
- let new_ptr = self.decode_pointer(size);
if follow_pointers {
+ let new_ptr = self.decode_pointer(size);
let saved_ptr = self.current_ptr;
self.current_ptr = new_ptr;
- self.skip_value()?;
+ let result = match self.check_skip_depth(skip_depth) {
+ Ok(child_depth) => match self.skip_value_with_depth(child_depth) {
+ Ok(()) => self.validate_skip_end(),
+ Err(err) => Err(err),
+ },
+ Err(err) => Err(err),
+ };
self.current_ptr = saved_ptr;
+ return result;
}
- Ok(())
+ let pointer_size = ((size >> 3) & 0x3) + 1;
+ self.skip_bytes(pointer_size, "pointer")
}
TYPE_STRING | TYPE_BYTES => {
// String or Bytes - skip size bytes
+ let label = if type_num == TYPE_STRING {
+ "string"
+ } else {
+ "bytes"
+ };
if follow_pointers {
self.current_ptr += size;
+ Ok(())
} else {
- let label = if type_num == TYPE_STRING {
- "string"
- } else {
- "bytes"
- };
- self.current_ptr = self.checked_offset(size, label)?;
+ self.skip_bytes(size, label)
}
- Ok(())
}
TYPE_DOUBLE => {
// Double - must be exactly 8 bytes
@@ -702,10 +775,10 @@ impl<'de> Decoder<'de> {
}
if follow_pointers {
self.current_ptr += size;
+ Ok(())
} else {
- self.current_ptr = self.checked_offset(size, "double")?;
+ self.skip_bytes(size, "double")
}
- Ok(())
}
TYPE_FLOAT => {
// Float - must be exactly 4 bytes
@@ -714,27 +787,27 @@ impl<'de> Decoder<'de> {
}
if follow_pointers {
self.current_ptr += size;
+ Ok(())
} else {
- self.current_ptr = self.checked_offset(size, "float")?;
+ self.skip_bytes(size, "float")
}
- Ok(())
}
TYPE_UINT16 | TYPE_UINT32 | TYPE_INT32 | TYPE_UINT64 | TYPE_UINT128 => {
// Numeric types - skip size bytes
+ let label = match type_num {
+ TYPE_UINT16 => "u16",
+ TYPE_UINT32 => "u32",
+ TYPE_INT32 => "i32",
+ TYPE_UINT64 => "u64",
+ TYPE_UINT128 => "u128",
+ _ => unreachable!(),
+ };
if follow_pointers {
self.current_ptr += size;
+ Ok(())
} else {
- let label = match type_num {
- TYPE_UINT16 => "u16",
- TYPE_UINT32 => "u32",
- TYPE_INT32 => "i32",
- TYPE_UINT64 => "u64",
- TYPE_UINT128 => "u128",
- _ => unreachable!(),
- };
- self.current_ptr = self.checked_offset(size, label)?;
+ self.skip_bytes(size, label)
}
- Ok(())
}
TYPE_BOOL => {
// Boolean - size field IS the value, no data bytes to skip
@@ -742,16 +815,20 @@ impl<'de> Decoder<'de> {
}
TYPE_MAP => {
// Map - skip size key-value pairs
+ let child_depth = self.check_skip_depth(skip_depth)?;
for _ in 0..size {
- self.skip_value_inner_with_follow(follow_pointers)?; // key
- self.skip_value_inner_with_follow(follow_pointers)?; // value
+ // key
+ self.skip_value_inner_with_follow(follow_pointers, child_depth)?;
+ // value
+ self.skip_value_inner_with_follow(follow_pointers, child_depth)?;
}
Ok(())
}
TYPE_ARRAY => {
// Array - skip size elements
+ let child_depth = self.check_skip_depth(skip_depth)?;
for _ in 0..size {
- self.skip_value_inner_with_follow(follow_pointers)?;
+ self.skip_value_inner_with_follow(follow_pointers, child_depth)?;
}
Ok(())
}
@@ -759,9 +836,20 @@ impl<'de> Decoder<'de> {
}
}
- fn skip_value_inner_with_follow(&mut self, follow_pointers: bool) -> DecodeResult<()> {
+ #[inline(always)]
+ fn skip_value_with_depth(&mut self, skip_depth: u16) -> DecodeResult<()> {
+ let (size, type_num) = self.size_and_type()?;
+ self.skip_value_inner(size, type_num, true, skip_depth)
+ }
+
+ #[inline(always)]
+ fn skip_value_inner_with_follow(
+ &mut self,
+ follow_pointers: bool,
+ skip_depth: u16,
+ ) -> DecodeResult<()> {
let (size, type_num) = self.size_and_type()?;
- self.skip_value_inner(size, type_num, follow_pointers)
+ self.skip_value_inner(size, type_num, follow_pointers, skip_depth)
}
}
@@ -939,6 +1027,9 @@ impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> {
V: Visitor<'de>,
{
self.skip_value()?;
+ if self.depth == 0 {
+ self.validate_skip_end()?;
+ }
visitor.visit_unit()
}
@@ -989,6 +1080,13 @@ impl<'de> SeqAccess<'de> for ArrayAccess<'_, 'de> {
{
// Check if there are no more elements.
if self.count == 0 {
+ // Only top-level containers need an explicit final overrun check.
+ // Nested containers are bounded by the value that contains them.
+ if self.de.depth <= 1 && self.de.current_ptr > self.de.limit {
+ return Err(self
+ .de
+ .invalid_db_error("skipped value extends beyond buffer"));
+ }
return Ok(None);
}
self.count -= 1;
@@ -1018,6 +1116,13 @@ impl<'de> MapAccess<'de> for MapAccessor<'_, 'de> {
{
// Check if there are no more entries.
if self.count == 0 {
+ // Only top-level containers need an explicit final overrun check.
+ // Nested containers are bounded by the value that contains them.
+ if self.de.depth <= 1 && self.de.current_ptr > self.de.limit {
+ return Err(self
+ .de
+ .invalid_db_error("skipped value extends beyond buffer"));
+ }
return Ok(None);
}
self.count -= 1;
@@ -1094,7 +1199,9 @@ impl<'de> de::VariantAccess<'de> for EnumAccessor<'_, 'de> {
#[cfg(test)]
mod tests {
- use crate::Reader;
+ use crate::{MaxMindDbError, Reader};
+
+ use super::Decoder;
#[test]
fn test_decoder_accepts_tuple_with_matching_length() {
@@ -1159,4 +1266,12 @@ mod tests {
.to_string()
.contains("expected tuple of length 2, got array of length 3"));
}
+
+ #[test]
+ fn test_skip_value_for_verification_rejects_truncated_pointer_payload() {
+ let mut decoder = Decoder::new(&[0x28], 0);
+ let err = decoder.skip_value_for_verification().unwrap_err();
+
+ assert!(matches!(err, MaxMindDbError::InvalidDatabase { .. }));
+ }
}
diff --git a/src/geoip2.rs b/src/geoip2.rs
index a8813421..668925fb 100644
--- a/src/geoip2.rs
+++ b/src/geoip2.rs
@@ -351,7 +351,8 @@ pub struct Asn<'a> {
/// Country/City database model structs.
///
-/// These structs are used by both [`super::Country`] and [`super::City`] records.
+/// These structs are used by both [`crate::geoip2::Country`] and
+/// [`crate::geoip2::City`] records.
pub mod country {
use super::Names;
use serde::{Deserialize, Serialize};
@@ -432,7 +433,8 @@ pub mod country {
/// City database model structs.
///
-/// City-specific structs. Country-level structs are re-exported from [`super::country`].
+/// City-specific structs. Country-level structs are re-exported from
+/// [`crate::geoip2::country`].
pub mod city {
use super::Names;
use serde::{Deserialize, Serialize};
@@ -513,7 +515,7 @@ pub mod city {
/// Enterprise database model structs.
///
/// Enterprise-specific structs with confidence scores. Some structs are
-/// re-exported from [`super::country`].
+/// re-exported from [`crate::geoip2::country`].
pub mod enterprise {
use super::Names;
use serde::{Deserialize, Serialize};
diff --git a/src/lib.rs b/src/lib.rs
index b19c723d..36f39b4f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -263,7 +263,7 @@ mod tests {
fn test_lookup_network_uses_measured_ipv4_subtree_depth() {
let mut reader =
Reader::open_readfile("test-data/test-data/MaxMind-DB-test-ipv6-32.mmdb").unwrap();
- assert_eq!(reader.metadata.ip_version, 6);
+ assert_eq!(reader.metadata().ip_version, 6);
// Simulate a valid IPv6 database whose IPv4 subtree starts somewhere
// other than bit 96. Using a shallow subtree depth keeps the combined
diff --git a/src/metadata.rs b/src/metadata.rs
index 2cb4bc29..74f2c298 100644
--- a/src/metadata.rs
+++ b/src/metadata.rs
@@ -1,9 +1,12 @@
//! Database metadata types.
use std::collections::BTreeMap;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
use serde::{Deserialize, Serialize};
+use crate::error::MaxMindDbError;
+
/// Metadata about the MaxMind DB file.
#[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq)]
pub struct Metadata {
@@ -31,6 +34,8 @@ impl Metadata {
/// Returns the database build time as a `SystemTime`.
///
/// This converts the `build_epoch` Unix timestamp to a `SystemTime`.
+ /// If `build_epoch` is too large to represent on this platform, this
+ /// returns an [`InvalidDatabase`](MaxMindDbError::InvalidDatabase) error.
///
/// # Example
///
@@ -38,11 +43,18 @@ impl Metadata {
/// use maxminddb::Reader;
///
/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
- /// let build_time = reader.metadata.build_time();
+ /// let build_time = reader.metadata().build_time().unwrap();
/// println!("Database built: {:?}", build_time);
/// ```
- #[must_use]
- pub fn build_time(&self) -> std::time::SystemTime {
- std::time::UNIX_EPOCH + std::time::Duration::from_secs(self.build_epoch)
+ #[inline]
+ pub fn build_time(&self) -> Result {
+ UNIX_EPOCH
+ .checked_add(Duration::from_secs(self.build_epoch))
+ .ok_or_else(|| {
+ MaxMindDbError::invalid_database(format!(
+ "build_epoch - Unix timestamp is too large to represent as SystemTime: {}",
+ self.build_epoch
+ ))
+ })
}
}
diff --git a/src/reader.rs b/src/reader.rs
index 08eb3ec2..c2a34fb6 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -23,6 +23,7 @@ use crate::within::{IpInt, Within, WithinNode, WithinOptions};
/// Size of the data section separator (16 zero bytes).
const DATA_SECTION_SEPARATOR_SIZE: usize = 16;
+const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com";
/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the
/// lifetime of the underlying buffer holding the contents of the database file.
@@ -40,10 +41,10 @@ const DATA_SECTION_SEPARATOR_SIZE: usize = 16;
pub struct Reader> {
pub(crate) buf: S,
/// Database metadata.
- pub metadata: Metadata,
+ metadata: Metadata,
record_size: u16,
/// Cached `Metadata::node_count` for `Reader` search-tree traversal.
- /// Use this instead of `metadata.node_count`, which is publicly mutable.
+ /// Use this instead of `metadata.node_count` for traversal invariants.
node_count: usize,
/// Cached bytes per node derived from `Metadata::record_size` for `Reader`.
/// Use this instead of `metadata.record_size` in lookup hot paths.
@@ -53,6 +54,8 @@ pub struct Reader> {
/// correct prefix lengths for IPv4 lookups in IPv6 databases.
pub(crate) ipv4_start_bit_depth: usize,
pub(crate) pointer_base: usize,
+ pub(crate) data_section_len: usize,
+ pub(crate) metadata_start: usize,
}
impl> std::fmt::Debug for Reader {
@@ -63,6 +66,8 @@ impl> std::fmt::Debug for Reader {
.field("ipv4_start", &self.ipv4_start)
.field("ipv4_start_bit_depth", &self.ipv4_start_bit_depth)
.field("pointer_base", &self.pointer_base)
+ .field("data_section_len", &self.data_section_len)
+ .field("metadata_start", &self.metadata_start)
.finish_non_exhaustive()
}
}
@@ -124,9 +129,12 @@ impl<'de, S: AsRef<[u8]>> Reader {
/// ```
pub fn from_source(buf: S) -> Result, MaxMindDbError> {
let metadata_start = find_metadata_start(buf.as_ref())?;
+ // find_metadata_start returns the offset after the marker; the marker
+ // bytes are not part of the data section and must stay out of limits.
+ let data_section_end = metadata_marker_start(metadata_start)?;
let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0);
let metadata = Metadata::deserialize(&mut type_decoder)?;
- validate_record_size(metadata.record_size)?;
+ validate_metadata_for_reader(&metadata)?;
let search_tree_size =
search_tree_size_bytes(metadata.node_count as usize, metadata.record_size as usize)?;
@@ -140,7 +148,8 @@ impl<'de, S: AsRef<[u8]>> Reader {
"the MaxMind DB file's search tree extends beyond the file",
)
})?;
- validate_search_tree_layout(pointer_base, metadata_start)?;
+ validate_search_tree_layout(pointer_base, data_section_end)?;
+ let data_section_len = data_section_end - pointer_base;
let mut reader = Reader {
buf,
@@ -148,6 +157,8 @@ impl<'de, S: AsRef<[u8]>> Reader {
node_count,
node_byte_size,
pointer_base,
+ data_section_len,
+ metadata_start,
metadata,
ipv4_start: 0,
ipv4_start_bit_depth: 0,
@@ -159,6 +170,15 @@ impl<'de, S: AsRef<[u8]>> Reader {
Ok(reader)
}
+ /// Returns database metadata.
+ ///
+ /// Metadata is validated when the reader is created and exposed by
+ /// reference so it cannot be mutated independently of cached reader state.
+ #[inline]
+ pub fn metadata(&self) -> &Metadata {
+ &self.metadata
+ }
+
/// Lookup an IP address in the database.
///
/// Returns a [`LookupResult`] that can be used to:
@@ -166,7 +186,6 @@ impl<'de, S: AsRef<[u8]>> Reader {
/// - Get the network containing the IP with [`network()`](LookupResult::network)
/// - Decode the full record with [`decode()`](LookupResult::decode)
/// - Decode a specific path with [`decode_path()`](LookupResult::decode_path)
- /// - Get a low-level decoder with [`decoder()`](LookupResult::decoder)
///
/// # Examples
///
@@ -361,12 +380,45 @@ impl<'de, S: AsRef<[u8]>> Reader {
let mut node = self.start_node(bit_count);
let node_count = self.node_count;
+ let has_ipv4_subtree = self.has_ipv4_subtree();
let mut stack: Vec = Vec::with_capacity(bit_count - prefix_len);
+ // `bit_count == 32` means the caller requested an IPv4 CIDR. In an
+ // IPv6 database with no IPv4 subtree, `start_node(32)` can already be a
+ // terminal IPv6 record reached by walking the all-zero prefix. Do not
+ // read that terminal value as a tree node; yield the containing IPv6
+ // network instead, matching lookup behavior.
+ if bit_count == 32
+ && self.metadata.ip_version == 6
+ && !has_ipv4_subtree
+ && node >= node_count
+ {
+ stack.push(WithinNode {
+ node,
+ ip_int: IpInt::V6(0),
+ prefix_len: self.ipv4_start_bit_depth,
+ });
+
+ return Ok(Within {
+ reader: self,
+ node_count,
+ has_ipv4_subtree,
+ stack,
+ options,
+ });
+ }
+
// Traverse down the tree to the level that matches the cidr mark
let mut depth = 0_usize;
for i in 0..prefix_len {
+ // `read_node` is only valid for internal search-tree nodes.
+ if node >= node_count {
+ // We've hit a data node or dead end before we exhausted our prefix.
+ // This means the requested CIDR is contained in a single record.
+ break;
+ }
+
let bit = ip_int.get_bit(i);
node = self.read_node(node, bit as usize);
depth = i + 1; // We've now traversed i+1 bits (bits 0 through i)
@@ -391,6 +443,7 @@ impl<'de, S: AsRef<[u8]>> Reader {
let within = Within {
reader: self,
node_count,
+ has_ipv4_subtree,
stack,
options,
};
@@ -547,19 +600,8 @@ impl<'de, S: AsRef<[u8]>> Reader {
"the MaxMind DB file's data pointer resolves to an invalid location",
)
})?;
- let data_section_len = self
- .buf
- .as_ref()
- .len()
- .checked_sub(self.pointer_base)
- .ok_or_else(|| {
- MaxMindDbError::invalid_database(
- "the MaxMind DB file's data pointer resolves to an invalid location",
- )
- })?;
-
- // Check bounds using pointer_base which marks the start of the data section
- if resolved >= data_section_len {
+ // Reject offsets at or beyond the marker-excluding data section length.
+ if resolved >= self.data_section_len {
return Err(MaxMindDbError::invalid_database(
"the MaxMind DB file's data pointer resolves to an invalid location",
));
@@ -596,25 +638,15 @@ impl<'de, S: AsRef<[u8]>> Reader {
/// ```
pub fn verify(&self) -> Result<(), MaxMindDbError> {
let metadata_start = find_metadata_start(self.buf.as_ref())?;
- self.verify_metadata(metadata_start)?;
- self.verify_database(metadata_start)
+ let data_section_end = metadata_marker_start(metadata_start)?;
+ self.verify_metadata(data_section_end)?;
+ self.verify_database(data_section_end)
}
- fn verify_metadata(&self, metadata_start: usize) -> Result<(), MaxMindDbError> {
+ fn verify_metadata(&self, data_section_end: usize) -> Result<(), MaxMindDbError> {
let m = &self.metadata;
- if m.binary_format_major_version != 2 {
- return Err(MaxMindDbError::invalid_database(format!(
- "binary_format_major_version - Expected: 2 Actual: {}",
- m.binary_format_major_version
- )));
- }
- if m.binary_format_minor_version != 0 {
- return Err(MaxMindDbError::invalid_database(format!(
- "binary_format_minor_version - Expected: 0 Actual: {}",
- m.binary_format_minor_version
- )));
- }
+ validate_metadata_for_reader(m)?;
if m.database_type.is_empty() {
return Err(MaxMindDbError::invalid_database(
"database_type - Expected: non-empty string Actual: \"\"",
@@ -625,26 +657,14 @@ impl<'de, S: AsRef<[u8]>> Reader {
"description - Expected: non-empty map Actual: {}",
));
}
- if m.ip_version != 4 && m.ip_version != 6 {
- return Err(MaxMindDbError::invalid_database(format!(
- "ip_version - Expected: 4 or 6 Actual: {}",
- m.ip_version
- )));
- }
- validate_record_size(m.record_size)?;
- if m.node_count == 0 {
- return Err(MaxMindDbError::invalid_database(
- "node_count - Expected: positive integer Actual: 0",
- ));
- }
- validate_search_tree_layout(self.pointer_base, metadata_start)?;
+ validate_search_tree_layout(self.pointer_base, data_section_end)?;
Ok(())
}
- fn verify_database(&self, metadata_start: usize) -> Result<(), MaxMindDbError> {
+ fn verify_database(&self, data_section_end: usize) -> Result<(), MaxMindDbError> {
let offsets = self.verify_search_tree()?;
self.verify_data_section_separator()?;
- self.verify_data_section(offsets, metadata_start)
+ self.verify_data_section(offsets, data_section_end)
}
fn verify_search_tree(&self) -> Result, MaxMindDbError> {
@@ -700,9 +720,9 @@ impl<'de, S: AsRef<[u8]>> Reader {
fn verify_data_section(
&self,
offsets: HashSet,
- metadata_start: usize,
+ data_section_end: usize,
) -> Result<(), MaxMindDbError> {
- let data_section = &self.buf.as_ref()[self.pointer_base..metadata_start];
+ let data_section = &self.buf.as_ref()[self.pointer_base..data_section_end];
// Verify each offset from the search tree points to valid, decodable data
for &offset in &offsets {
@@ -742,6 +762,29 @@ fn validate_record_size(record_size: u16) -> Result<(), MaxMindDbError> {
}
}
+pub(crate) fn validate_metadata_for_reader(metadata: &Metadata) -> Result<(), MaxMindDbError> {
+ if metadata.binary_format_major_version != 2 {
+ return Err(MaxMindDbError::invalid_database(format!(
+ "binary_format_major_version - Expected: 2 Actual: {}",
+ metadata.binary_format_major_version
+ )));
+ }
+ // Minor format versions are intended to be forward-compatible.
+ if metadata.ip_version != 4 && metadata.ip_version != 6 {
+ return Err(MaxMindDbError::invalid_database(format!(
+ "ip_version - Expected: 4 or 6 Actual: {}",
+ metadata.ip_version
+ )));
+ }
+ if metadata.node_count == 0 {
+ return Err(MaxMindDbError::invalid_database(
+ "node_count - Expected: positive integer Actual: 0",
+ ));
+ }
+ metadata.build_time()?;
+ validate_record_size(metadata.record_size)
+}
+
fn search_tree_size_bytes(node_count: usize, record_size: usize) -> Result {
node_count
.checked_mul(record_size)
@@ -755,9 +798,9 @@ fn search_tree_size_bytes(node_count: usize, record_size: usize) -> Result Result<(), MaxMindDbError> {
- if pointer_base > metadata_start {
+ if pointer_base > data_section_end {
return Err(MaxMindDbError::invalid_database(
"the MaxMind DB file's search tree extends beyond the metadata section",
));
@@ -870,11 +913,15 @@ fn normalize_lookup_result(node: usize, node_count: usize, prefix_len: usize) ->
}
fn find_metadata_start(buf: &[u8]) -> Result {
- const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com";
-
memchr::memmem::rfind(buf, METADATA_START_MARKER)
.map(|x| x + METADATA_START_MARKER.len())
.ok_or_else(|| {
MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file")
})
}
+
+fn metadata_marker_start(metadata_start: usize) -> Result {
+ metadata_start
+ .checked_sub(METADATA_START_MARKER.len())
+ .ok_or_else(|| MaxMindDbError::invalid_database("invalid metadata marker location"))
+}
diff --git a/src/reader_test.rs b/src/reader_test.rs
index 329e7cd8..dc9db792 100644
--- a/src/reader_test.rs
+++ b/src/reader_test.rs
@@ -1,10 +1,12 @@
use std::net::IpAddr;
+use std::time::{Duration, UNIX_EPOCH};
use ipnetwork::IpNetwork;
use serde::Deserialize;
use serde_json::json;
use crate::geoip2;
+use crate::reader::validate_metadata_for_reader;
use crate::{MaxMindDbError, Reader, Within, WithinOptions};
const TEST_DATABASE_CONFIGS: &[(usize, usize)] =
@@ -108,19 +110,19 @@ fn test_pointers_in_metadata() {
let reader = open_test_data_reader("MaxMind-DB-test-metadata-pointers.mmdb");
assert_eq!(
- reader.metadata.database_type,
+ reader.metadata().database_type,
"Lots of pointers in metadata"
);
assert_eq!(
- reader.metadata.description["en"],
+ reader.metadata().description["en"],
"Lots of pointers in metadata"
);
assert_eq!(
- reader.metadata.description["es"],
+ reader.metadata().description["es"],
"Lots of pointers in metadata"
);
assert_eq!(
- reader.metadata.description["zh"],
+ reader.metadata().description["zh"],
"Lots of pointers in metadata"
);
@@ -465,7 +467,7 @@ fn test_within_city() {
}
fn check_metadata>(reader: &Reader, ip_version: usize, record_size: usize) {
- let metadata = &reader.metadata;
+ let metadata = reader.metadata();
assert_eq!(metadata.binary_format_major_version, 2_u16);
assert_eq!(metadata.binary_format_minor_version, 0_u16);
@@ -494,15 +496,78 @@ fn check_metadata>(reader: &Reader, ip_version: usize, record_
}
#[test]
-fn test_lookup_uses_cached_record_size_after_metadata_mutation() {
+fn test_metadata_build_time_conversion() {
init_logger();
- let mut reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb");
- reader.metadata.record_size = 0;
+ let reader = open_test_data_reader("GeoIP2-City-Test.mmdb");
- let lookup = reader.lookup("1.1.1.1".parse().unwrap()).unwrap();
- assert!(lookup.has_data());
- assert_eq!(lookup.network().unwrap().to_string(), "1.1.1.1/32");
+ assert_eq!(
+ reader.metadata().build_time().unwrap(),
+ UNIX_EPOCH + Duration::from_secs(reader.metadata().build_epoch)
+ );
+}
+
+#[test]
+fn test_metadata_build_time_rejects_uint64_max_epoch() {
+ init_logger();
+
+ let err =
+ Reader::open_readfile("test-data/bad-data/libmaxminddb/libmaxminddb-uint64-max-epoch.mmdb")
+ .unwrap_err();
+
+ assert!(
+ matches!(
+ err,
+ MaxMindDbError::InvalidDatabase { ref message, .. }
+ if message
+ == "build_epoch - Unix timestamp is too large to represent as SystemTime: 18446744073709551615"
+ ),
+ "Expected InvalidDatabase error for unrepresentable build_epoch, got {err:?}"
+ );
+}
+
+#[test]
+fn test_reader_metadata_accessor_returns_validated_metadata() {
+ init_logger();
+
+ let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb");
+
+ assert_eq!(reader.metadata().record_size, 24);
+ assert_eq!(reader.metadata().ip_version, 4);
+ assert_eq!(reader.metadata().database_type, "Test");
+}
+
+#[test]
+fn test_metadata_validation_rejects_hard_invariants() {
+ init_logger();
+
+ let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb");
+ let metadata = reader.metadata();
+
+ let mut invalid = metadata.clone();
+ invalid.binary_format_major_version = 3;
+ assert!(matches!(
+ validate_metadata_for_reader(&invalid),
+ Err(MaxMindDbError::InvalidDatabase { .. })
+ ));
+
+ let mut future_minor = metadata.clone();
+ future_minor.binary_format_minor_version = u16::MAX;
+ validate_metadata_for_reader(&future_minor).unwrap();
+
+ let mut invalid = metadata.clone();
+ invalid.ip_version = 5;
+ assert!(matches!(
+ validate_metadata_for_reader(&invalid),
+ Err(MaxMindDbError::InvalidDatabase { .. })
+ ));
+
+ let mut invalid = metadata.clone();
+ invalid.node_count = 0;
+ assert!(matches!(
+ validate_metadata_for_reader(&invalid),
+ Err(MaxMindDbError::InvalidDatabase { .. })
+ ));
}
#[test]
@@ -511,7 +576,7 @@ fn test_resolve_data_pointer_rejects_small_pointer() {
let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb");
let err = reader
- .resolve_data_pointer(reader.metadata.node_count as usize)
+ .resolve_data_pointer(reader.metadata().node_count as usize)
.unwrap_err();
assert!(matches!(err, MaxMindDbError::InvalidDatabase { .. }));
@@ -1054,6 +1119,20 @@ fn test_within_rejects_ipv6_cidr_for_ipv4_database() {
}
}
+#[test]
+fn test_within_no_ipv4_search_tree() {
+ init_logger();
+
+ let reader = open_test_data_reader("MaxMind-DB-no-ipv4-search-tree.mmdb");
+
+ for cidr in ["::/0", "::/64", "0.0.0.0/0", "200.0.2.1/32"] {
+ let cidr: IpNetwork = cidr.parse().unwrap();
+ let networks = collect_networks(reader.within(cidr, Default::default()).unwrap());
+
+ assert_eq!(networks, vec!["::/64"], "unexpected networks for {cidr}");
+ }
+}
+
/// Test that verify() succeeds on valid databases (matching Go's TestVerifyOnGoodDatabases)
#[test]
fn test_verify_good_databases() {
@@ -1124,6 +1203,38 @@ fn test_verify_broken_pointers() {
);
}
+#[test]
+fn test_rejects_data_pointer_to_metadata_marker() {
+ init_logger();
+
+ let source_path = "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb";
+ let reader = Reader::open_readfile(source_path).unwrap();
+ assert_eq!(reader.metadata().record_size, 24);
+
+ let pointer = reader.metadata().node_count as usize + 16 + reader.data_section_len;
+ assert!(pointer <= 0x00ff_ffff);
+
+ let mut bytes = std::fs::read(source_path).unwrap();
+ for record in bytes[..6].chunks_exact_mut(3) {
+ record[0] = ((pointer >> 16) & 0xff) as u8;
+ record[1] = ((pointer >> 8) & 0xff) as u8;
+ record[2] = (pointer & 0xff) as u8;
+ }
+
+ let reader = Reader::from_source(bytes).unwrap();
+ let err = reader.lookup("1.1.1.1".parse().unwrap()).unwrap_err();
+ assert!(
+ matches!(err, MaxMindDbError::InvalidDatabase { .. }),
+ "Expected InvalidDatabase error for marker pointer, got {err:?}"
+ );
+
+ let result = reader.verify();
+ assert!(
+ matches!(result, Err(MaxMindDbError::InvalidDatabase { .. })),
+ "Expected InvalidDatabase error for marker pointer during verify, got {result:?}"
+ );
+}
+
#[test]
fn test_verify_broken_search_tree() {
init_logger();
@@ -1175,6 +1286,60 @@ fn test_verify_rejects_truncated_scalar_value() {
result
);
}
+
+#[test]
+fn test_decode_rejects_truncated_ignored_scalar_value() {
+ init_logger();
+
+ let source_path = "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb";
+ let reader = open_test_data_reader("MaxMind-DB-test-ipv4-24.mmdb");
+ let lookup = reader.lookup("1.1.1.32".parse().unwrap()).unwrap();
+ let data_offset = lookup.offset().expect("expected data offset");
+ let mut bytes = std::fs::read(source_path).unwrap();
+ let record_start = reader.pointer_base + data_offset;
+ let string_value = b"1.1.1.32";
+ let relative_value_offset = bytes[record_start..]
+ .windows(string_value.len())
+ .position(|window| window == string_value)
+ .expect("expected terminal string payload in fixture record");
+ let string_ctrl_offset = record_start + relative_value_offset - 1;
+ assert_eq!(
+ bytes[string_ctrl_offset], 0x48,
+ "unexpected string control byte in source fixture"
+ );
+
+ // Inflate the terminal string from length 8 to length 28 without adding
+ // bytes. Decoding into a struct with no fields forces serde to skip the
+ // corrupt value as unknown data.
+ bytes[string_ctrl_offset] = 0x5c;
+
+ #[derive(Deserialize, Debug)]
+ struct Empty {}
+
+ let reader = Reader::from_source(bytes).unwrap();
+ let lookup = reader.lookup("1.1.1.32".parse().unwrap()).unwrap();
+ let err = lookup.decode::().unwrap_err();
+
+ assert!(matches!(err, MaxMindDbError::InvalidDatabase { .. }));
+}
+
+#[test]
+fn test_decode_rejects_deep_nesting_in_ignored_values() {
+ init_logger();
+
+ let reader =
+ Reader::open_readfile("test-data/bad-data/libmaxminddb/libmaxminddb-deep-nesting.mmdb")
+ .unwrap();
+ let lookup = reader.lookup("1.1.1.1".parse().unwrap()).unwrap();
+ let err = lookup.decode::().unwrap_err();
+
+ assert!(
+ err.to_string()
+ .contains("exceeded maximum data structure depth"),
+ "unexpected error: {err}"
+ );
+}
+
/// Test that size hints are properly returned for sequences and maps
#[test]
fn test_size_hints() {
diff --git a/src/result.rs b/src/result.rs
index 8b3253bb..e30ca044 100644
--- a/src/result.rs
+++ b/src/result.rs
@@ -74,7 +74,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> {
#[inline]
fn decoder(&self, offset: usize) -> super::decoder::Decoder<'a> {
let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..];
- super::decoder::Decoder::new(buf, offset)
+ super::decoder::Decoder::new_with_limit(buf, offset, self.reader.data_section_len)
}
/// Creates a new LookupResult for a found IP.
@@ -281,6 +281,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> {
}
if !found {
+ decoder.validate_skip_end().map_err(with_path)?;
return Ok(None);
}
}
diff --git a/src/within.rs b/src/within.rs
index 4aaf75e4..9719ba0c 100644
--- a/src/within.rs
+++ b/src/within.rs
@@ -1,7 +1,7 @@
//! Network iteration types.
use std::cmp::Ordering;
-use std::net::IpAddr;
+use std::net::{IpAddr, Ipv6Addr};
use crate::decoder;
use crate::error::MaxMindDbError;
@@ -111,6 +111,7 @@ pub(crate) struct WithinNode {
pub struct Within<'de, S: AsRef<[u8]>> {
pub(crate) reader: &'de Reader,
pub(crate) node_count: usize,
+ pub(crate) has_ipv4_subtree: bool,
pub(crate) stack: Vec,
pub(crate) options: WithinOptions,
}
@@ -180,8 +181,6 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> {
match current.node.cmp(&self.node_count) {
Ordering::Greater => {
// This is a data node, emit it and we're done (until the following next call)
- let ip_addr = ip_int_to_addr(¤t.ip_int);
-
// Resolve the pointer to a data offset
let data_offset = match self.reader.resolve_data_pointer(current.node) {
Ok(offset) => offset,
@@ -197,17 +196,8 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> {
}
}
- let network_kind = match current.ip_int {
- IpInt::V4(_) => NetworkKind::V4,
- IpInt::V6(_)
- if current.ip_int.is_ipv4_in_ipv6()
- && self.reader.has_ipv4_subtree()
- && current.prefix_len >= self.reader.ipv4_start_bit_depth =>
- {
- NetworkKind::V4InV6Subtree
- }
- IpInt::V6(_) => NetworkKind::V6,
- };
+ let network_kind = self.network_kind(¤t);
+ let ip_addr = network_ip_addr(network_kind, current.ip_int);
return Some(Ok(LookupResult::new_found(
self.reader,
@@ -221,18 +211,8 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> {
Ordering::Equal => {
// Dead end (no data) - include if option is set
if self.options.include_networks_without_data {
- let ip_addr = ip_int_to_addr(¤t.ip_int);
- let network_kind = match current.ip_int {
- IpInt::V4(_) => NetworkKind::V4,
- IpInt::V6(_)
- if current.ip_int.is_ipv4_in_ipv6()
- && self.reader.has_ipv4_subtree()
- && current.prefix_len >= self.reader.ipv4_start_bit_depth =>
- {
- NetworkKind::V4InV6Subtree
- }
- IpInt::V6(_) => NetworkKind::V6,
- };
+ let network_kind = self.network_kind(¤t);
+ let ip_addr = network_ip_addr(network_kind, current.ip_int);
return Some(Ok(LookupResult::new_not_found(
self.reader,
current.prefix_len as u8,
@@ -263,6 +243,24 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> {
}
impl<'de, S: AsRef<[u8]>> Within<'de, S> {
+ #[inline(always)]
+ fn network_kind(&self, node: &WithinNode) -> NetworkKind {
+ match node.ip_int {
+ IpInt::V4(_) if self.reader.metadata().ip_version == 6 && !self.has_ipv4_subtree => {
+ NetworkKind::V6
+ }
+ IpInt::V4(_) => NetworkKind::V4,
+ IpInt::V6(_)
+ if node.ip_int.is_ipv4_in_ipv6()
+ && self.has_ipv4_subtree
+ && node.prefix_len >= self.reader.ipv4_start_bit_depth =>
+ {
+ NetworkKind::V4InV6Subtree
+ }
+ IpInt::V6(_) => NetworkKind::V6,
+ }
+ }
+
fn push_child(
&mut self,
parent_node: usize,
@@ -281,7 +279,8 @@ impl<'de, S: AsRef<[u8]>> Within<'de, S> {
/// Check if the value at the given data offset is an empty map or array.
fn is_empty_value_at(&self, data_offset: usize) -> Result {
let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..];
- let mut dec = decoder::Decoder::new(buf, data_offset);
+ let mut dec =
+ decoder::Decoder::new_with_limit(buf, data_offset, self.reader.data_section_len);
let (size, type_num) = dec.peek_type()?;
match type_num {
decoder::TYPE_MAP | decoder::TYPE_ARRAY => Ok(size == 0),
@@ -290,12 +289,28 @@ impl<'de, S: AsRef<[u8]>> Within<'de, S> {
}
}
+#[inline(always)]
+fn network_ip_addr(network_kind: NetworkKind, ip_int: IpInt) -> IpAddr {
+ let ip_addr = ip_int_to_addr(&ip_int);
+ match (network_kind, ip_int, ip_addr) {
+ // Low IPv6 tree keys are normally formatted as IPv4 addresses for IPv4
+ // subtree records. When the record is really IPv6, preserve the low
+ // IPv6 bits instead of collapsing the network to ::.
+ (NetworkKind::V6, IpInt::V6(ip), IpAddr::V4(_)) => IpAddr::V6(ip.into()),
+ // Defensive fallback for IPv4 CIDR iteration in IPv6 databases without
+ // an IPv4 subtree. Reader::within() should seed those as IpInt::V6(0).
+ (NetworkKind::V6, IpInt::V4(_), IpAddr::V4(_)) => IpAddr::V6(Ipv6Addr::UNSPECIFIED),
+ (_, _, ip_addr) => ip_addr,
+ }
+}
+
/// Convert IpInt to IpAddr
+#[inline(always)]
pub(crate) fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr {
match ip_int {
IpInt::V4(ip) => IpAddr::V4((*ip).into()),
IpInt::V6(ip) => {
- // Check if this is an IPv4-mapped IPv6 address
+ // Check if this is an IPv4-mapped IPv6 address.
if *ip <= 0xFFFFFFFF {
IpAddr::V4((*ip as u32).into())
} else {
@@ -304,3 +319,28 @@ pub(crate) fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr {
}
}
}
+
+#[cfg(test)]
+mod tests {
+ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
+
+ use crate::result::NetworkKind;
+
+ use super::{network_ip_addr, IpInt};
+
+ #[test]
+ fn network_ip_addr_preserves_low_ipv6_bits() {
+ assert_eq!(
+ network_ip_addr(NetworkKind::V6, IpInt::V6(1)),
+ IpAddr::V6(Ipv6Addr::from(1_u128))
+ );
+ }
+
+ #[test]
+ fn network_ip_addr_formats_ipv4_subtree_records_as_ipv4() {
+ assert_eq!(
+ network_ip_addr(NetworkKind::V4InV6Subtree, IpInt::V6(1)),
+ IpAddr::V4(Ipv4Addr::from(1_u32))
+ );
+ }
+}