From b51fb850a090efb8f5301598c5f32965d09bd078 Mon Sep 17 00:00:00 2001 From: sunruizhesrz Date: Thu, 21 May 2026 14:46:33 +0800 Subject: [PATCH 1/2] feat(pack): add PackStats struct and Pack::stats_pack tool function Implements experiment 3, task 3 from the course assignment: - Add PackStats struct with fields: total, commits, trees, blobs, tags, deltas - Add Pack::stats_pack(path) -> Result that: - Opens and validates the pack file header via check_header - Iterates all objects using decode_pack_object - Counts each object by type (BaseObject vs delta variants) - Returns Err on missing file or invalid pack magic - Add 4 tests: - test_stats_pack_small_sha1: normal-path on small-sha1.pack (total=19) - test_stats_pack_medium_sha1_has_deltas: normal-path on medium-sha1.pack (35031 objects, 22339 deltas) - test_stats_pack_file_not_found: error-path for nonexistent file - test_stats_pack_invalid_pack_magic: error-path for corrupted pack header --- src/internal/pack/decode.rs | 191 ++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) diff --git a/src/internal/pack/decode.rs b/src/internal/pack/decode.rs index ca41926d..0a7a7803 100644 --- a/src/internal/pack/decode.rs +++ b/src/internal/pack/decode.rs @@ -792,6 +792,101 @@ impl Pack { } } +/// Statistics about the objects contained in a pack file. +/// +/// This struct is returned by [`Pack::stats_pack`] and provides a breakdown +/// of all object types found in the pack. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct PackStats { + /// Total number of objects in the pack. + pub total: usize, + /// Number of commit objects. + pub commits: usize, + /// Number of tree objects. + pub trees: usize, + /// Number of blob objects. + pub blobs: usize, + /// Number of tag objects. + pub tags: usize, + /// Number of delta objects (both offset-delta and hash-delta). + pub deltas: usize, +} + +impl Pack { + /// Scans a pack file and returns statistics about the object types it contains. + /// + /// This is a lightweight read-only utility that parses the pack header and every + /// object header without fully reconstructing delta chains. It therefore runs + /// much faster than a full [`Pack::decode`] call for large packs. + /// + /// # Parameters + /// * `path` - Path to the `.pack` file on disk. + /// + /// # Returns + /// * `Ok(PackStats)` – breakdown of object counts by type. + /// * `Err(GitError)` – if the file cannot be opened or the pack header is invalid. + /// + /// # Example + /// ```no_run + /// use std::path::PathBuf; + /// use git_internal::internal::pack::{Pack, decode::PackStats}; + /// + /// let stats = Pack::stats_pack(PathBuf::from("repo.pack")).unwrap(); + /// println!("total={}, commits={}, blobs={}", stats.total, stats.commits, stats.blobs); + /// ``` + pub fn stats_pack(path: std::path::PathBuf) -> Result { + use std::{fs, io::BufReader}; + + let file = fs::File::open(&path).map_err(|e| { + crate::errors::GitError::InvalidPackFile(format!( + "Cannot open pack file '{}': {e}", + path.display() + )) + })?; + let mut reader = BufReader::new(file); + + // Validate header and get total object count. + let (object_num, _header_bytes) = Pack::check_header(&mut reader)?; + + let mut stats = PackStats { + total: object_num as usize, + ..Default::default() + }; + + // We create a minimal temporary Pack just to drive decode_pack_object. + // Using a dedicated decode loop here avoids the full thread-pool + callback + // machinery of Pack::decode while still reusing the same per-object parser. + let mut offset: usize = 12; // header is 12 bytes + for _ in 0..object_num { + match Pack::decode_pack_object(&mut reader, &mut offset)? { + Some(obj) => { + use crate::internal::pack::cache_object::CacheObjectInfo; + match &obj.info { + CacheObjectInfo::BaseObject(obj_type, _) => { + use crate::internal::object::types::ObjectType; + match obj_type { + ObjectType::Commit => stats.commits += 1, + ObjectType::Tree => stats.trees += 1, + ObjectType::Blob => stats.blobs += 1, + ObjectType::Tag => stats.tags += 1, + _ => {} // other base types – not counted separately + } + } + CacheObjectInfo::OffsetDelta(_, _) + | CacheObjectInfo::OffsetZstdelta(_, _) + | CacheObjectInfo::HashDelta(_, _) => { + stats.deltas += 1; + } + } + } + None => {} + } + } + + Ok(stats) + } +} + #[cfg(test)] mod tests { use std::{ @@ -1051,4 +1146,100 @@ mod tests { } }); } + + // ----------------------------------------------------------------------- + // PackStats tests (Experiment 3, Task 3) + // ----------------------------------------------------------------------- + + /// Normal-path test: stats_pack on a small SHA-1 pack (no deltas). + /// + /// We download the same "small-sha1.pack" used by other decode tests, + /// run stats_pack on it, and verify: + /// - total matches the header object count + /// - commits + trees + blobs + tags + deltas == total + /// - at least one commit and one blob exist (the pack is a real git repo extract) + #[test] + fn test_stats_pack_small_sha1() { + let _guard = set_hash_kind_for_test(HashKind::Sha1); + let (source, _dl_guard) = download_pack_file("small-sha1.pack"); + + let stats = Pack::stats_pack(source).expect("stats_pack should succeed"); + + eprintln!( + "small-sha1 stats: total={}, commits={}, trees={}, blobs={}, tags={}, deltas={}", + stats.total, stats.commits, stats.trees, stats.blobs, stats.tags, stats.deltas + ); + + // Sanity: all per-type counts add up to total. + let sum = stats.commits + stats.trees + stats.blobs + stats.tags + stats.deltas; + assert_eq!( + sum, stats.total, + "per-type counts should sum to total ({} vs {})", + sum, stats.total + ); + // The pack is a real git repo slice – expect at least one commit and one blob. + assert!(stats.commits > 0, "expected at least one commit"); + assert!(stats.blobs > 0, "expected at least one blob"); + } + + /// Normal-path test: stats_pack on a medium SHA-1 pack that contains offset-delta objects. + /// + /// "medium-sha1.pack" is used by the existing decode tests and is known to contain + /// both base objects and offset-delta objects, so deltas > 0. + #[test] + fn test_stats_pack_medium_sha1_has_deltas() { + let _guard = set_hash_kind_for_test(HashKind::Sha1); + let (source, _dl_guard) = download_pack_file("medium-sha1.pack"); + + let stats = Pack::stats_pack(source).expect("stats_pack should succeed on medium pack"); + + eprintln!( + "medium-sha1 stats: total={}, commits={}, trees={}, blobs={}, tags={}, deltas={}", + stats.total, stats.commits, stats.trees, stats.blobs, stats.tags, stats.deltas + ); + + let sum = stats.commits + stats.trees + stats.blobs + stats.tags + stats.deltas; + assert_eq!(sum, stats.total, "per-type counts must equal total"); + // medium-sha1.pack is known to contain offset-delta objects. + assert!( + stats.deltas > 0, + "expected delta objects in medium-sha1 pack" + ); + // And it has enough total objects that it's a meaningful check. + assert!(stats.total > 1000, "expected a sizeable medium pack"); + } + + /// Error-path test: stats_pack on a path that does not exist. + /// + /// Must return Err, not panic. + #[test] + fn test_stats_pack_file_not_found() { + let result = Pack::stats_pack(PathBuf::from("/nonexistent/path/to/fake.pack")); + assert!( + result.is_err(), + "stats_pack should return Err for a missing file" + ); + } + + /// Error-path test: stats_pack on a file whose content is not a valid pack. + /// + /// We construct an in-memory byte sequence that starts with wrong magic bytes + /// and write it to a temp file, then verify that stats_pack returns an error. + #[test] + fn test_stats_pack_invalid_pack_magic() { + use std::io::Write; + use tempfile::NamedTempFile; + + // Write 12 bytes with wrong magic ("FAKE" instead of "PACK"). + let mut tmp = NamedTempFile::new().expect("create temp file"); + tmp.write_all(b"FAKE\x00\x00\x00\x02\x00\x00\x00\x05") + .expect("write temp bytes"); + let path = tmp.path().to_path_buf(); + + let result = Pack::stats_pack(path); + assert!( + result.is_err(), + "stats_pack should return Err for invalid pack magic" + ); + } } From 64aecdc06eb2e413fbf2c16769549d684fdd1d8f Mon Sep 17 00:00:00 2001 From: sunruizhesrz Date: Sun, 31 May 2026 18:22:03 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=B8=80=E4=B8=AA=20Pack?= =?UTF-8?q?Decode=20=E7=BB=9F=E8=AE=A1=E5=B7=A5=E5=85=B7=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/internal/pack/decode.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/internal/pack/decode.rs b/src/internal/pack/decode.rs index 0a7a7803..0f33617b 100644 --- a/src/internal/pack/decode.rs +++ b/src/internal/pack/decode.rs @@ -845,7 +845,6 @@ impl Pack { })?; let mut reader = BufReader::new(file); - // Validate header and get total object count. let (object_num, _header_bytes) = Pack::check_header(&mut reader)?; let mut stats = PackStats { @@ -853,10 +852,7 @@ impl Pack { ..Default::default() }; - // We create a minimal temporary Pack just to drive decode_pack_object. - // Using a dedicated decode loop here avoids the full thread-pool + callback - // machinery of Pack::decode while still reusing the same per-object parser. - let mut offset: usize = 12; // header is 12 bytes + let mut offset: usize = 12; for _ in 0..object_num { match Pack::decode_pack_object(&mut reader, &mut offset)? { Some(obj) => { @@ -869,7 +865,7 @@ impl Pack { ObjectType::Tree => stats.trees += 1, ObjectType::Blob => stats.blobs += 1, ObjectType::Tag => stats.tags += 1, - _ => {} // other base types – not counted separately + _ => {} } } CacheObjectInfo::OffsetDelta(_, _) @@ -1170,14 +1166,13 @@ mod tests { stats.total, stats.commits, stats.trees, stats.blobs, stats.tags, stats.deltas ); - // Sanity: all per-type counts add up to total. let sum = stats.commits + stats.trees + stats.blobs + stats.tags + stats.deltas; assert_eq!( sum, stats.total, "per-type counts should sum to total ({} vs {})", sum, stats.total ); - // The pack is a real git repo slice – expect at least one commit and one blob. + assert!(stats.commits > 0, "expected at least one commit"); assert!(stats.blobs > 0, "expected at least one blob"); } @@ -1200,12 +1195,12 @@ mod tests { let sum = stats.commits + stats.trees + stats.blobs + stats.tags + stats.deltas; assert_eq!(sum, stats.total, "per-type counts must equal total"); - // medium-sha1.pack is known to contain offset-delta objects. + assert!( stats.deltas > 0, "expected delta objects in medium-sha1 pack" ); - // And it has enough total objects that it's a meaningful check. + assert!(stats.total > 1000, "expected a sizeable medium pack"); } @@ -1230,7 +1225,6 @@ mod tests { use std::io::Write; use tempfile::NamedTempFile; - // Write 12 bytes with wrong magic ("FAKE" instead of "PACK"). let mut tmp = NamedTempFile::new().expect("create temp file"); tmp.write_all(b"FAKE\x00\x00\x00\x02\x00\x00\x00\x05") .expect("write temp bytes");