diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..2b476ae9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +tests/data/packs/* filter=lfs diff=lfs merge=lfs -text +*.pack filter=lfs diff=lfs merge=lfs -text +20de184187340e8b98da0fa14685b6a3e5c638ba2cf6b7b193b17c0ee1da38e8 filter=lfs diff=lfs merge=lfs -text diff --git a/Cargo.lock b/Cargo.lock index 68dfc285..d59811a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -993,7 +993,6 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", - "ureq", "uuid", "zstd-sys", ] @@ -2245,41 +2244,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "rustls" -version = "0.23.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" -dependencies = [ - "log", - "once_cell", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-pki-types" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" -dependencies = [ - "zeroize", -] - -[[package]] -name = "rustls-webpki" -version = "0.103.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted", -] - [[package]] name = "rustversion" version = "1.0.22" @@ -3187,35 +3151,6 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" -[[package]] -name = "ureq" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0" -dependencies = [ - "base64", - "flate2", - "log", - "percent-encoding", - "rustls", - "rustls-pki-types", - "ureq-proto", - "utf8-zero", - "webpki-roots", -] - -[[package]] -name = "ureq-proto" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c" -dependencies = [ - "base64", - "http", - "httparse", - "log", -] - [[package]] name = "url" version = "2.5.7" @@ -3228,12 +3163,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf8-zero" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -3379,15 +3308,6 @@ dependencies = [ "semver", ] -[[package]] -name = "webpki-roots" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "whoami" version = "1.6.1" diff --git a/Cargo.toml b/Cargo.toml index 1907fc8c..965966a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,7 +64,6 @@ tokio-util = { version = "0.7.18", features = ["io"] } quickcheck = "1.1.0" rand = "0.10.0" chacha20 = "0.10.0" -ureq = "3" [features] default = ["diff_mydrs"] diff --git a/src/hash.rs b/src/hash.rs index 0d762f5f..90b543a1 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -288,14 +288,13 @@ pub fn set_hash_kind_for_test(kind: HashKind) -> HashKindGuard { mod tests { use std::{ + env, io::{BufReader, Read, Seek, SeekFrom}, + path::PathBuf, str::FromStr, }; - use crate::{ - hash::{HashKind, ObjectHash, set_hash_kind_for_test}, - internal::pack::test_pack_download::download_pack_file, - }; + use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test}; /// Hashing "Hello, world!" with SHA1 should match known value. #[test] @@ -329,7 +328,8 @@ mod tests { #[test] fn test_signature_without_delta() { let _guard = set_hash_kind_for_test(HashKind::Sha1); - let (source, _dl_guard) = download_pack_file("small-sha1.pack"); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push("tests/data/packs/small-sha1.pack"); let f = std::fs::File::open(source).unwrap(); let mut buffered = BufReader::new(f); @@ -345,7 +345,8 @@ mod tests { #[test] fn test_signature_without_delta_sha256() { let _guard = set_hash_kind_for_test(HashKind::Sha256); - let (source, _dl_guard) = download_pack_file("small-sha256.pack"); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push("tests/data/packs/small-sha256.pack"); let f = std::fs::File::open(source).unwrap(); let mut buffered = BufReader::new(f); diff --git a/src/internal/pack/decode.rs b/src/internal/pack/decode.rs index ca41926d..44efaf28 100644 --- a/src/internal/pack/decode.rs +++ b/src/internal/pack/decode.rs @@ -795,7 +795,7 @@ impl Pack { #[cfg(test)] mod tests { use std::{ - fs, + env, fs, io::{BufReader, Cursor, prelude::*}, path::PathBuf, sync::{ @@ -810,12 +810,13 @@ mod tests { use crate::{ hash::{HashKind, ObjectHash, set_hash_kind_for_test}, - internal::pack::{Pack, test_pack_download::download_pack_file, tests::init_logger}, + internal::pack::{Pack, tests::init_logger}, }; #[tokio::test] async fn test_pack_check_header() { - let (source, _guard) = download_pack_file("medium-sha1.pack"); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push("tests/data/packs/medium-sha1.pack"); let f = fs::File::open(source).unwrap(); let mut buf_reader = BufReader::new(f); @@ -862,9 +863,10 @@ mod tests { } /// Helper function to run decode tests without delta objects - fn run_decode_no_delta(filename: &str, kind: HashKind) { + fn run_decode_no_delta(rel_path: &str, kind: HashKind) { let _guard = set_hash_kind_for_test(kind); - let (source, _dl_guard) = download_pack_file(filename); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push(rel_path); let tmp = PathBuf::from("/tmp/.cache_temp"); @@ -876,16 +878,17 @@ mod tests { } #[test] fn test_pack_decode_without_delta() { - run_decode_no_delta("small-sha1.pack", HashKind::Sha1); - run_decode_no_delta("small-sha256.pack", HashKind::Sha256); + run_decode_no_delta("tests/data/packs/small-sha1.pack", HashKind::Sha1); + run_decode_no_delta("tests/data/packs/small-sha256.pack", HashKind::Sha256); } /// Helper function to run decode tests with delta objects - fn run_decode_with_ref_delta(filename: &str, kind: HashKind) { + fn run_decode_with_ref_delta(rel_path: &str, kind: HashKind) { let _guard = set_hash_kind_for_test(kind); init_logger(); - let (source, _dl_guard) = download_pack_file(filename); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push(rel_path); let tmp = PathBuf::from("/tmp/.cache_temp"); @@ -897,14 +900,15 @@ mod tests { } #[test] fn test_pack_decode_with_ref_delta() { - run_decode_with_ref_delta("ref-delta-sha1.pack", HashKind::Sha1); - run_decode_with_ref_delta("ref-delta-sha256.pack", HashKind::Sha256); + run_decode_with_ref_delta("tests/data/packs/ref-delta-sha1.pack", HashKind::Sha1); + run_decode_with_ref_delta("tests/data/packs/ref-delta-sha256.pack", HashKind::Sha256); } /// Helper function to run decode tests without memory limit - fn run_decode_no_mem_limit(filename: &str, kind: HashKind) { + fn run_decode_no_mem_limit(rel_path: &str, kind: HashKind) { let _guard = set_hash_kind_for_test(kind); - let (source, _dl_guard) = download_pack_file(filename); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push(rel_path); let tmp = PathBuf::from("/tmp/.cache_temp"); @@ -916,15 +920,16 @@ mod tests { } #[test] fn test_pack_decode_no_mem_limit() { - run_decode_no_mem_limit("small-sha1.pack", HashKind::Sha1); - run_decode_no_mem_limit("small-sha256.pack", HashKind::Sha256); + run_decode_no_mem_limit("tests/data/packs/small-sha1.pack", HashKind::Sha1); + run_decode_no_mem_limit("tests/data/packs/small-sha256.pack", HashKind::Sha256); } /// Helper function to run decode tests with delta objects - async fn run_decode_large_with_delta(filename: &str, kind: HashKind) { + async fn run_decode_large_with_delta(rel_path: &str, kind: HashKind) { let _guard = set_hash_kind_for_test(kind); init_logger(); - let (source, _dl_guard) = download_pack_file(filename); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push(rel_path); let tmp = PathBuf::from("/tmp/.cache_temp"); @@ -950,15 +955,16 @@ mod tests { } #[tokio::test] async fn test_pack_decode_with_large_file_with_delta_without_ref() { - run_decode_large_with_delta("medium-sha1.pack", HashKind::Sha1).await; - run_decode_large_with_delta("medium-sha256.pack", HashKind::Sha256).await; + run_decode_large_with_delta("tests/data/packs/medium-sha1.pack", HashKind::Sha1).await; + run_decode_large_with_delta("tests/data/packs/medium-sha256.pack", HashKind::Sha256).await; } // it will be stuck on dropping `Pack` on Windows if `mem_size` is None, so we need `mimalloc` /// Helper function to run decode tests with large file stream - async fn run_decode_large_stream(filename: &str, kind: HashKind) { + async fn run_decode_large_stream(rel_path: &str, kind: HashKind) { let _guard = set_hash_kind_for_test(kind); init_logger(); - let (source, _dl_guard) = download_pack_file(filename); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push(rel_path); let tmp = PathBuf::from("/tmp/.cache_temp"); let f = tokio::fs::File::open(source).await.unwrap(); @@ -985,14 +991,15 @@ mod tests { } #[tokio::test] async fn test_decode_large_file_stream() { - run_decode_large_stream("medium-sha1.pack", HashKind::Sha1).await; - run_decode_large_stream("medium-sha256.pack", HashKind::Sha256).await; + run_decode_large_stream("tests/data/packs/medium-sha1.pack", HashKind::Sha1).await; + run_decode_large_stream("tests/data/packs/medium-sha256.pack", HashKind::Sha256).await; } /// Helper function to run decode tests with large file async - async fn run_decode_large_file_async(filename: &str, kind: HashKind) { + async fn run_decode_large_file_async(rel_path: &str, kind: HashKind) { let _guard = set_hash_kind_for_test(kind); - let (source, _dl_guard) = download_pack_file(filename); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push(rel_path); let tmp = PathBuf::from("/tmp/.cache_temp"); let f = fs::File::open(source).unwrap(); @@ -1010,14 +1017,15 @@ mod tests { } #[tokio::test] async fn test_decode_large_file_async() { - run_decode_large_file_async("medium-sha1.pack", HashKind::Sha1).await; - run_decode_large_file_async("medium-sha256.pack", HashKind::Sha256).await; + run_decode_large_file_async("tests/data/packs/medium-sha1.pack", HashKind::Sha1).await; + run_decode_large_file_async("tests/data/packs/medium-sha256.pack", HashKind::Sha256).await; } /// Helper function to run decode tests with delta objects without reference - fn run_decode_with_delta_no_ref(filename: &str, kind: HashKind) { + fn run_decode_with_delta_no_ref(rel_path: &str, kind: HashKind) { let _guard = set_hash_kind_for_test(kind); - let (source, _dl_guard) = download_pack_file(filename); + let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + source.push(rel_path); let tmp = PathBuf::from("/tmp/.cache_temp"); @@ -1029,8 +1037,8 @@ mod tests { } #[test] fn test_pack_decode_with_delta_without_ref() { - run_decode_with_delta_no_ref("medium-sha1.pack", HashKind::Sha1); - run_decode_with_delta_no_ref("medium-sha256.pack", HashKind::Sha256); + run_decode_with_delta_no_ref("tests/data/packs/medium-sha1.pack", HashKind::Sha1); + run_decode_with_delta_no_ref("tests/data/packs/medium-sha256.pack", HashKind::Sha256); } #[test] // Take too long time @@ -1041,12 +1049,12 @@ mod tests { .unwrap(); rt.block_on(async move { // For each hash kind, run two decode tasks concurrently to simulate multi-task pressure. - for (kind, filename) in [ - (HashKind::Sha1, "medium-sha1.pack"), - (HashKind::Sha256, "medium-sha256.pack"), + for (kind, path) in [ + (HashKind::Sha1, "tests/data/packs/medium-sha1.pack"), + (HashKind::Sha256, "tests/data/packs/medium-sha256.pack"), ] { - let f1 = run_decode_large_with_delta(filename, kind); - let f2 = run_decode_large_with_delta(filename, kind); + let f1 = run_decode_large_with_delta(path, kind); + let f2 = run_decode_large_with_delta(path, kind); let _ = futures::future::join(f1, f2).await; } }); diff --git a/src/internal/pack/encode.rs b/src/internal/pack/encode.rs index 78b5f66a..0f8e02ec 100644 --- a/src/internal/pack/encode.rs +++ b/src/internal/pack/encode.rs @@ -259,6 +259,76 @@ fn cheap_similar(a: &[u8], b: &[u8]) -> bool { calc_hash(&a[..k]) == calc_hash(&b[..k]) } +#[derive(Debug, Default)] +struct TypedEntryBuckets { + commits: Vec>, + trees: Vec>, + blobs: Vec>, + tags: Vec>, +} + +async fn classify_entries_by_type( + entry_rx: &mut mpsc::Receiver>, +) -> Result { + let mut buckets = TypedEntryBuckets::default(); + + while let Some(entry) = entry_rx.recv().await { + match entry.inner.obj_type { + ObjectType::Commit => buckets.commits.push(entry), + ObjectType::Tree => buckets.trees.push(entry), + ObjectType::Blob => buckets.blobs.push(entry), + ObjectType::Tag => buckets.tags.push(entry), + _ => { + return Err(GitError::PackEncodeError(format!( + "object type `{}` is not supported by delta-window pack encoding", + entry.inner.obj_type + ))); + } + } + } + + Ok(buckets) +} + +fn sort_entry_buckets(buckets: &mut TypedEntryBuckets) { + buckets.commits.sort_by(magic_sort); + buckets.trees.sort_by(magic_sort); + buckets.blobs.sort_by(magic_sort); + buckets.tags.sort_by(magic_sort); +} + +fn score_delta_candidate(base: &Entry, entry: &Entry) -> Option { + if base.obj_type != entry.obj_type { + return None; + } + + if base.chain_len >= MAX_CHAIN_LEN { + return None; + } + + if base.hash == entry.hash { + return None; + } + + let min_len = base.data.len().min(entry.data.len()); + let max_len = base.data.len().max(entry.data.len()); + if max_len == 0 || (min_len as f64 / max_len as f64) < 0.5 { + return None; + } + + if !cheap_similar(&base.data, &entry.data) { + return None; + } + + let rate = if (base.data.len() + entry.data.len()) / 2 > 64 { + delta::heuristic_encode_rate_parallel(&base.data, &entry.data) + } else { + delta::encode_rate(&base.data, &entry.data) + }; + + (rate > MIN_DELTA_RATE).then_some(rate) +} + impl PackEncoder { pub fn new(object_number: usize, window_size: usize, sender: mpsc::Sender>) -> Self { PackEncoder { @@ -358,45 +428,23 @@ impl PackEncoder { )); } - let mut commits: Vec> = Vec::new(); - let mut trees: Vec> = Vec::new(); - let mut blobs: Vec> = Vec::new(); - let mut tags: Vec> = Vec::new(); - while let Some(entry) = entry_rx.recv().await { - match entry.inner.obj_type { - ObjectType::Commit => { - commits.push(entry); - } - ObjectType::Tree => { - trees.push(entry); - } - ObjectType::Blob => { - blobs.push(entry); - } - ObjectType::Tag => { - tags.push(entry); - } - _ => { - return Err(GitError::PackEncodeError(format!( - "object type `{}` is not supported by delta-window pack encoding", - entry.inner.obj_type - ))); - } - } - } - - commits.sort_by(magic_sort); - trees.sort_by(magic_sort); - blobs.sort_by(magic_sort); - tags.sort_by(magic_sort); + let mut buckets = classify_entries_by_type(&mut entry_rx).await?; + sort_entry_buckets(&mut buckets); tracing::info!( "numbers : commits: {:?} trees: {:?} blobs:{:?} tag :{:?}", - commits.len(), - trees.len(), - blobs.len(), - tags.len() + buckets.commits.len(), + buckets.trees.len(), + buckets.blobs.len(), + buckets.tags.len() ); + let TypedEntryBuckets { + commits, + trees, + blobs, + tags, + } = buckets; + // parallel encoding vec with different object_type let (commit_results, tree_results, blob_results, tag_results) = tokio::try_join!( tokio::task::spawn_blocking(move || { @@ -496,41 +544,7 @@ impl PackEncoder { .par_iter() .with_min_len(3) .filter_map(|try_base| { - if try_base.0.obj_type != entry.obj_type { - return None; - } - - if try_base.0.chain_len >= MAX_CHAIN_LEN { - return None; - } - - if try_base.0.hash == entry.hash { - return None; - } - - let sym_ratio = (try_base.0.data.len().min(entry.data.len()) as f64) - / (try_base.0.data.len().max(entry.data.len()) as f64); - if sym_ratio < 0.5 { - return None; - } - - if !cheap_similar(&try_base.0.data, &entry.data) { - return None; - } - - let rate = if (try_base.0.data.len() + entry.data.len()) / 2 > 64 { - delta::heuristic_encode_rate_parallel(&try_base.0.data, &entry.data) - } else { - delta::encode_rate(&try_base.0.data, &entry.data) - // let try_delta_obj = zstdelta::diff(&try_base.0.data, &entry.data).unwrap(); - // 1.0 - try_delta_obj.len() as f64 / entry.data.len() as f64 - }; - - if rate > MIN_DELTA_RATE { - Some((rate, try_base)) - } else { - None - } + score_delta_candidate(&try_base.0, entry).map(|rate| (rate, try_base)) }) .collect(); @@ -746,7 +760,7 @@ impl PackEncoder { #[cfg(test)] mod tests { - use std::{io::Cursor, path::PathBuf, sync::Arc, time::Instant}; + use std::{env, io::Cursor, path::PathBuf, sync::Arc, time::Instant}; use tempfile::tempdir; use tokio::sync::Mutex; @@ -756,12 +770,7 @@ mod tests { hash::{HashKind, ObjectHash, set_hash_kind_for_test}, internal::{ object::{blob::Blob, types::ObjectType}, - pack::{ - Pack, - test_pack_download::{PackFileGuard, download_pack_file}, - tests::init_logger, - utils::read_offset_encoding, - }, + pack::{Pack, tests::init_logger, utils::read_offset_encoding}, }, time_it, }; @@ -926,8 +935,128 @@ mod tests { assert!(matches!(err, GitError::PackEncodeError(_))); } - async fn get_entries_for_test() -> (Arc>>, PackFileGuard) { - let (source, dl_guard) = download_pack_file("encode-test-sha1.pack"); + fn entry_with_type( + content: &str, + obj_type: ObjectType, + file_path: Option<&str>, + ) -> MetaAttached { + let mut entry: Entry = Blob::from_content(content).into(); + entry.obj_type = obj_type; + let mut meta = EntryMeta::new(); + meta.file_path = file_path.map(str::to_string); + MetaAttached { inner: entry, meta } + } + + fn blob_entry_from_bytes(data: Vec) -> Entry { + Blob::from_content_bytes(data).into() + } + + #[tokio::test] + async fn test_classify_entries_by_type_groups_supported_objects() { + let _guard = set_hash_kind_for_test(HashKind::Sha1); + let (entry_tx, mut entry_rx) = mpsc::channel::>(4); + + for obj_type in [ + ObjectType::Commit, + ObjectType::Tree, + ObjectType::Blob, + ObjectType::Tag, + ] { + entry_tx + .send(entry_with_type("content", obj_type, None)) + .await + .expect("send entry"); + } + drop(entry_tx); + + let buckets = classify_entries_by_type(&mut entry_rx) + .await + .expect("classify supported objects"); + + assert_eq!(buckets.commits.len(), 1); + assert_eq!(buckets.trees.len(), 1); + assert_eq!(buckets.blobs.len(), 1); + assert_eq!(buckets.tags.len(), 1); + } + + #[tokio::test] + async fn test_classify_entries_by_type_rejects_delta_objects() { + let _guard = set_hash_kind_for_test(HashKind::Sha1); + let (entry_tx, mut entry_rx) = mpsc::channel::>(1); + + entry_tx + .send(entry_with_type("delta", ObjectType::OffsetDelta, None)) + .await + .expect("send entry"); + drop(entry_tx); + + let err = classify_entries_by_type(&mut entry_rx) + .await + .expect_err("delta objects should be rejected before delta-window encoding"); + assert!(matches!(err, GitError::PackEncodeError(_))); + } + + #[test] + fn test_sort_entry_buckets_uses_magic_sort_for_each_type() { + let _guard = set_hash_kind_for_test(HashKind::Sha1); + let mut buckets = TypedEntryBuckets { + blobs: vec![ + entry_with_type("no path", ObjectType::Blob, None), + entry_with_type("file 10", ObjectType::Blob, Some("src/file10.txt")), + entry_with_type("file 2", ObjectType::Blob, Some("src/file2.txt")), + ], + ..TypedEntryBuckets::default() + }; + + sort_entry_buckets(&mut buckets); + + let sorted_paths: Vec<_> = buckets + .blobs + .iter() + .map(|entry| entry.meta.file_path.as_deref()) + .collect(); + assert_eq!( + sorted_paths, + vec![Some("src/file2.txt"), Some("src/file10.txt"), None] + ); + } + + #[test] + fn test_score_delta_candidate_rejects_unsuitable_bases() { + let _guard = set_hash_kind_for_test(HashKind::Sha1); + let entry = blob_entry_from_bytes(vec![b'a'; 128]); + + assert!(score_delta_candidate(&entry, &entry).is_none()); + + let mut saturated_chain_base = blob_entry_from_bytes(vec![b'a'; 128]); + saturated_chain_base.chain_len = MAX_CHAIN_LEN; + assert!(score_delta_candidate(&saturated_chain_base, &entry).is_none()); + + let mut tree_base = blob_entry_from_bytes(vec![b'a'; 128]); + tree_base.obj_type = ObjectType::Tree; + assert!(score_delta_candidate(&tree_base, &entry).is_none()); + + let small_base = blob_entry_from_bytes(vec![b'a'; 32]); + let large_entry = blob_entry_from_bytes(vec![b'a'; 128]); + assert!(score_delta_candidate(&small_base, &large_entry).is_none()); + } + + #[test] + fn test_score_delta_candidate_accepts_similar_content() { + let _guard = set_hash_kind_for_test(HashKind::Sha1); + let base = blob_entry_from_bytes(vec![b'a'; 128]); + let mut data = vec![b'a'; 128]; + data.extend_from_slice(b" with a small tail"); + let entry = blob_entry_from_bytes(data); + + let score = score_delta_candidate(&base, &entry); + + assert!(score.is_some_and(|rate| rate > MIN_DELTA_RATE)); + } + + async fn get_entries_for_test() -> Arc>> { + let source = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/data/packs/encode-test-sha1.pack"); let mut p = Pack::new(None, None, Some(PathBuf::from("/tmp/.cache_temp")), true); @@ -949,10 +1078,11 @@ mod tests { tracing::info!("total entries: {}", p.number); drop(p); - (entries, dl_guard) + entries } - async fn get_entries_for_test_sha256() -> (Arc>>, PackFileGuard) { - let (source, dl_guard) = download_pack_file("encode-test-sha256.pack"); + async fn get_entries_for_test_sha256() -> Arc>> { + let source = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/data/packs/encode-test-sha256.pack"); let mut p = Pack::new(None, None, Some(PathBuf::from("/tmp/.cache_temp")), true); @@ -974,7 +1104,7 @@ mod tests { tracing::info!("total entries: {}", p.number); drop(p); - (entries, dl_guard) + entries } #[tokio::test] @@ -983,7 +1113,7 @@ mod tests { init_logger(); let start = Instant::now(); - let (entries, _dl_guard) = get_entries_for_test().await; + let entries = get_entries_for_test().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1046,7 +1176,7 @@ mod tests { let start = Instant::now(); // use sha256 pack file for testing - let (entries, _dl_guard) = get_entries_for_test_sha256().await; + let entries = get_entries_for_test_sha256().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1104,7 +1234,7 @@ mod tests { async fn test_pack_encoder_large_file() { let _guard = set_hash_kind_for_test(HashKind::Sha1); init_logger(); - let (entries, _dl_guard) = get_entries_for_test().await; + let entries = get_entries_for_test().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1173,7 +1303,7 @@ mod tests { async fn test_pack_encoder_large_file_sha256() { let _guard = set_hash_kind_for_test(HashKind::Sha256); init_logger(); - let (entries, _dl_guard) = get_entries_for_test_sha256().await; + let entries = get_entries_for_test_sha256().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1243,7 +1373,7 @@ mod tests { async fn test_pack_encoder_with_zstdelta() { let _guard = set_hash_kind_for_test(HashKind::Sha1); init_logger(); - let (entries, _dl_guard) = get_entries_for_test().await; + let entries = get_entries_for_test().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1305,7 +1435,7 @@ mod tests { async fn test_pack_encoder_with_zstdelta_sha256() { let _guard = set_hash_kind_for_test(HashKind::Sha256); init_logger(); - let (entries, _dl_guard) = get_entries_for_test_sha256().await; + let entries = get_entries_for_test_sha256().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1381,7 +1511,7 @@ mod tests { async fn test_pack_encoder_large_file_with_delta() { let _guard = set_hash_kind_for_test(HashKind::Sha1); init_logger(); - let (entries, _dl_guard) = get_entries_for_test().await; + let entries = get_entries_for_test().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1444,7 +1574,7 @@ mod tests { async fn test_pack_encoder_large_file_with_delta_sha256() { let _guard = set_hash_kind_for_test(HashKind::Sha256); init_logger(); - let (entries, _dl_guard) = get_entries_for_test_sha256().await; + let entries = get_entries_for_test_sha256().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1508,7 +1638,7 @@ mod tests { async fn test_pack_encoder_output_to_files() { let _guard = set_hash_kind_for_test(HashKind::Sha1); init_logger(); - let (entries, _dl_guard) = get_entries_for_test().await; + let entries = get_entries_for_test().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries @@ -1576,7 +1706,7 @@ mod tests { async fn test_pack_encoder_output_to_files_with_delta() { let _guard = set_hash_kind_for_test(HashKind::Sha1); init_logger(); - let (entries, _dl_guard) = get_entries_for_test().await; + let entries = get_entries_for_test().await; let entries_number = entries.lock().await.len(); let total_original_size: usize = entries diff --git a/src/internal/pack/mod.rs b/src/internal/pack/mod.rs index 11c72693..dedbb2c7 100644 --- a/src/internal/pack/mod.rs +++ b/src/internal/pack/mod.rs @@ -39,9 +39,6 @@ pub struct Pack { pub clean_tmp: bool, } -#[cfg(test)] -pub(crate) mod test_pack_download; - #[cfg(test)] mod tests { use tracing_subscriber::util::SubscriberInitExt; diff --git a/src/internal/pack/pack_index.rs b/src/internal/pack/pack_index.rs index 5fe2694a..5f21b686 100644 --- a/src/internal/pack/pack_index.rs +++ b/src/internal/pack/pack_index.rs @@ -83,7 +83,8 @@ impl IdxBuilder { /// Write the fanout table for the index. async fn write_fanout(&mut self, entries: &mut [IndexEntry]) -> Result<(), GitError> { - entries.sort_by(|a, b| a.hash.cmp(&b.hash)); + // entries.sort_by(|a, b| a.hash.cmp(&b.hash)); + entries.sort_by_key(|a| a.hash); let mut fanout = [0u32; 256]; for entry in entries.iter() { fanout[entry.hash.to_data()[0] as usize] += 1; diff --git a/src/protocol/pack.rs b/src/protocol/pack.rs index d75ff838..686e4595 100644 --- a/src/protocol/pack.rs +++ b/src/protocol/pack.rs @@ -235,16 +235,16 @@ where .await?; } crate::internal::object::tree::TreeItemMode::Blob - | crate::internal::object::tree::TreeItemMode::BlobExecutable => { - if !visited_blobs.contains(&entry_hash) { - visited_blobs.insert(entry_hash.clone()); - let blob = self.repo_access.get_blob(&entry_hash).await.map_err(|e| { - ProtocolError::repository_error(format!( - "Failed to get blob {entry_hash}: {e}" - )) - })?; - blobs.push(blob); - } + | crate::internal::object::tree::TreeItemMode::BlobExecutable + if !visited_blobs.contains(&entry_hash) => + { + visited_blobs.insert(entry_hash.clone()); + let blob = self.repo_access.get_blob(&entry_hash).await.map_err(|e| { + ProtocolError::repository_error(format!( + "Failed to get blob {entry_hash}: {e}" + )) + })?; + blobs.push(blob); } _ => {} } diff --git a/tests/data/packs/.gitignore b/tests/data/packs/.gitignore index 901ed74c..bd745683 100644 --- a/tests/data/packs/.gitignore +++ b/tests/data/packs/.gitignore @@ -1,2 +1,3 @@ -*.pack -*.idx +version https://git-lfs.github.com/spec/v1 +oid sha256:858aaed41268637f7d03620aa7e32e1149f7b182d6afe501b2ef2b937785ff74 +size 13 diff --git a/tests/data/packs/encode-test-sha1.idx b/tests/data/packs/encode-test-sha1.idx new file mode 100644 index 00000000..dbb8e5a9 --- /dev/null +++ b/tests/data/packs/encode-test-sha1.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf950fb445825e7f5774bb21fd741b5a3e7cd0fa8d5df0193a070835532f0cbf +size 141912 diff --git a/tests/data/packs/encode-test-sha1.pack b/tests/data/packs/encode-test-sha1.pack new file mode 100644 index 00000000..fcfa782b --- /dev/null +++ b/tests/data/packs/encode-test-sha1.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1b136a987b979578172e7d37f3036bfb981454215df75ddba22df692c122e2 +size 5481392 diff --git a/tests/data/packs/encode-test-sha256.idx b/tests/data/packs/encode-test-sha256.idx new file mode 100644 index 00000000..bbfca82a --- /dev/null +++ b/tests/data/packs/encode-test-sha256.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b043edc35ba9174a641b3adab4928f42dced59b2dab2e6b7b0ce8e148cbaf33 +size 202296 diff --git a/tests/data/packs/encode-test-sha256.pack b/tests/data/packs/encode-test-sha256.pack new file mode 100644 index 00000000..3f3e2fd1 --- /dev/null +++ b/tests/data/packs/encode-test-sha256.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:674aa8e4e84965d474af70eb65e5c0ac0ee8a3f99c74ba97d39ef5e6870939f1 +size 5522940 diff --git a/tests/data/packs/medium-sha1.idx b/tests/data/packs/medium-sha1.idx new file mode 100644 index 00000000..85dcb563 --- /dev/null +++ b/tests/data/packs/medium-sha1.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf2b42fa3e6af402364381c410a8dc062fca9ace87c4b41e8d826986160e77c +size 981940 diff --git a/tests/data/packs/medium-sha1.pack b/tests/data/packs/medium-sha1.pack new file mode 100644 index 00000000..9f620d84 --- /dev/null +++ b/tests/data/packs/medium-sha1.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc7d0c2ca200496926ef31f3bb10dbb46660b72bcf0cd4a2095e91ea100e084 +size 119619498 diff --git a/tests/data/packs/medium-sha256.idx b/tests/data/packs/medium-sha256.idx new file mode 100644 index 00000000..ab17a442 --- /dev/null +++ b/tests/data/packs/medium-sha256.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29c7a170883b31a8c5c426fe4a9a7576e4c56e8bd01df4c9edb63378d30890fb +size 1402336 diff --git a/tests/data/packs/medium-sha256.pack b/tests/data/packs/medium-sha256.pack new file mode 100644 index 00000000..413625a8 --- /dev/null +++ b/tests/data/packs/medium-sha256.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5041b1b340fb3e8da23f88d945984504be69268202ae9803e440d3eb215c0c8 +size 120043690 diff --git a/tests/data/packs/ref-delta-sha1.idx b/tests/data/packs/ref-delta-sha1.idx new file mode 100644 index 00000000..c97d939a --- /dev/null +++ b/tests/data/packs/ref-delta-sha1.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ff0dc6ddd39bdab52641b42f35d510aca07b8f3d978fc0e5cb2ffec12d2fb6 +size 2304 diff --git a/tests/data/packs/ref-delta-sha1.pack b/tests/data/packs/ref-delta-sha1.pack new file mode 100644 index 00000000..2e5ab5ae --- /dev/null +++ b/tests/data/packs/ref-delta-sha1.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f071c5882cf66a378686d32a7245e80c440940b0d0a7fc755137b119974422 +size 3872 diff --git a/tests/data/packs/ref-delta-sha256.idx b/tests/data/packs/ref-delta-sha256.idx new file mode 100644 index 00000000..0cb8daf9 --- /dev/null +++ b/tests/data/packs/ref-delta-sha256.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a18333e95716a61624a73712ab8fc3f9291f0931cb259f448f7aa7376a53ab +size 2856 diff --git a/tests/data/packs/ref-delta-sha256.pack b/tests/data/packs/ref-delta-sha256.pack new file mode 100644 index 00000000..a457481b --- /dev/null +++ b/tests/data/packs/ref-delta-sha256.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa44b532f6dfbbe24862945bb4fe454ce4b8fea62e4d336ac7c4058efc85b90b +size 4459 diff --git a/tests/data/packs/small-sha1.idx b/tests/data/packs/small-sha1.idx new file mode 100644 index 00000000..072f699e --- /dev/null +++ b/tests/data/packs/small-sha1.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30700a10810c0fb01bb2c89c85d5a57d7ac4a2ec1ab328a89922463223667b71 +size 1604 diff --git a/tests/data/packs/small-sha1.pack b/tests/data/packs/small-sha1.pack new file mode 100644 index 00000000..c39c0329 --- /dev/null +++ b/tests/data/packs/small-sha1.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6d2b3f874500a53c605d84362a735bb98a22fcfa9d99008022507c5d17bf58 +size 2451 diff --git a/tests/data/packs/small-sha256.idx b/tests/data/packs/small-sha256.idx new file mode 100644 index 00000000..eff79a81 --- /dev/null +++ b/tests/data/packs/small-sha256.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68bd672679c9aba124999ff16228f4d37a9d4826773635e2a6cd6acc878afd74 +size 1856 diff --git a/tests/data/packs/small-sha256.pack b/tests/data/packs/small-sha256.pack new file mode 100644 index 00000000..23608d8e --- /dev/null +++ b/tests/data/packs/small-sha256.pack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12fb3b3c868d31ff5c73cfcb73651f661fb08221a332c20f3d19b1c5b9b702f3 +size 2778 diff --git a/tests/decode-index-pack.rs b/tests/decode-index-pack.rs index 0ca444b6..080dafb3 100644 --- a/tests/decode-index-pack.rs +++ b/tests/decode-index-pack.rs @@ -6,12 +6,10 @@ use std::{ convert::TryInto, fs, io::BufReader, + path::PathBuf, sync::{Arc, Mutex}, }; -mod common; - -use common::download_pack_file; use git_internal::{ errors::GitError, hash::{HashKind, ObjectHash, set_hash_kind_for_test}, @@ -26,6 +24,23 @@ use git_internal::{ }; use tokio::sync::mpsc; +fn packs_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/data/packs") +} + +fn find_pack(prefix: &str) -> PathBuf { + let dir = packs_dir(); + for entry in fs::read_dir(&dir).expect("read packs dir failed") { + let entry = entry.expect("dir entry error"); + let name = entry.file_name(); + let name = name.to_string_lossy(); + if name.starts_with(prefix) && name.ends_with(".pack") { + return entry.path(); + } + } + panic!("pack with prefix `{prefix}` not found in {:?}", dir); +} + fn parse_idx_offsets(idx_bytes: &[u8], kind: HashKind) -> HashMap, u64> { assert!(idx_bytes.len() >= 8, "idx too short"); assert_eq!(&idx_bytes[0..4], &[0xFF, 0x74, 0x4F, 0x63], "idx magic"); @@ -89,8 +104,8 @@ fn parse_idx_offsets(idx_bytes: &[u8], kind: HashKind) -> HashMap, u64> type DecodePackResult = Result<(Vec>, ObjectHash, usize), GitError>; -fn decode_pack(filename: &str) -> DecodePackResult { - let (pack_path, _dl_guard) = download_pack_file(filename); +fn decode_pack(prefix: &str) -> DecodePackResult { + let pack_path = find_pack(prefix); let file = fs::File::open(pack_path)?; let mut reader = BufReader::new(file); let mut pack = Pack::new(Some(2), Some(64 * 1024 * 1024), None, true); @@ -112,9 +127,9 @@ fn decode_pack(filename: &str) -> DecodePackResult { Ok((entries, pack_hash, count)) } -async fn roundtrip(filename: &str, kind: HashKind) -> Result<(), GitError> { +async fn roundtrip(prefix: &str, kind: HashKind) -> Result<(), GitError> { let _guard = set_hash_kind_for_test(kind); - let (metas, pack_hash, count) = decode_pack(filename)?; + let (metas, pack_hash, count) = decode_pack(prefix)?; assert_eq!(metas.len(), count, "decoded entries count mismatch"); let mut idx_entries = Vec::with_capacity(metas.len()); @@ -145,20 +160,20 @@ async fn roundtrip(filename: &str, kind: HashKind) -> Result<(), GitError> { #[tokio::test] async fn idx_offsets_match_sha1_small() -> Result<(), GitError> { - roundtrip("small-sha1.pack", HashKind::Sha1).await + roundtrip("small-sha1", HashKind::Sha1).await } #[tokio::test] async fn idx_offsets_match_sha1_delta() -> Result<(), GitError> { - roundtrip("ref-delta-sha1.pack", HashKind::Sha1).await + roundtrip("ref-delta-sha1", HashKind::Sha1).await } #[tokio::test] async fn idx_offsets_match_sha256_small() -> Result<(), GitError> { - roundtrip("small-sha256.pack", HashKind::Sha256).await + roundtrip("small-sha256", HashKind::Sha256).await } #[tokio::test] async fn idx_offsets_match_sha256_delta() -> Result<(), GitError> { - roundtrip("ref-delta-sha256.pack", HashKind::Sha256).await + roundtrip("ref-delta-sha256", HashKind::Sha256).await }