From 02237f2014338e37098d1f7867a1ebf07d694563 Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Fri, 5 Dec 2025 12:33:36 +0100 Subject: [PATCH 1/4] Fix leading zero handling of CIDs in DAG-CBOR --- Cargo.toml | 2 +- pytests/test_dag_cbor.py | 10 ++++++++++ src/lib.rs | 33 +++++++++++++++++++++++++-------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01e1dbb..998a1d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libipld" -version = "3.3.1" +version = "3.3.2" edition = "2021" license = "MIT" description = "Python binding to the Rust IPLD library" diff --git a/pytests/test_dag_cbor.py b/pytests/test_dag_cbor.py index d2b5046..5c2bff1 100644 --- a/pytests/test_dag_cbor.py +++ b/pytests/test_dag_cbor.py @@ -267,3 +267,13 @@ def test_encode_tag_negative_bignum() -> None: libipld.encode_dag_cbor(bignum) assert 'number out of range' in str(exc_info.value).lower() + + +def test_roundtrip_valid_cid_with_short_tag() -> None: + encoded_hex = 'd82a582500015512205891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03' + encoded_bytes = bytes.fromhex(encoded_hex) + + decoded = libipld.decode_dag_cbor(encoded_bytes) + encoded = libipld.encode_dag_cbor(decoded) + + assert encoded == encoded_bytes diff --git a/src/lib.rs b/src/lib.rs index 8b19c16..9481c64 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,6 +64,18 @@ impl<'de> dec::Read<'de> for SliceReader<'de> { } } +struct PrefixedCidBytes<'a>(&'a [u8]); + +impl<'a> Encode for PrefixedCidBytes<'a> { + fn encode(&self, w: &mut W) -> Result<(), enc::Error> { + // length prefix for bytes: 1 (leading 0) + payload + types::Bytes::bounded(1 + self.0.len(), w)?; + w.push(&[0x00])?; + w.push(self.0)?; + Ok(()) + } +} + fn cid_hash_to_pydict<'py>(py: Python<'py>, cid: &Cid) -> PyResult> { let hash = cid.hash(); let dict_obj = PyDict::new(py); @@ -251,14 +263,17 @@ where let cid = >::decode(r)?.0; - if cid.len() <= 1 { - return Err(anyhow!("CID is empty or too short")); - } else if Cid::try_from(&cid[1..]).is_err() { - // Parse the CID for validation. They have a zero byte at the front, strip it off. + // we expect CIDs to have a leading zero byte + if cid.len() <= 1 || cid[0] != 0 { + return Err(anyhow!("Invalid CID")); + } + + let cid_without_prefix = &cid[1..]; + if Cid::try_from(cid_without_prefix).is_err() { return Err(anyhow!("Invalid CID")); } - PyBytes::new(py, cid).into_pyobject(py)?.into() + PyBytes::new(py, cid_without_prefix).into_pyobject(py)?.into() } major::SIMPLE => match byte { // FIXME(MarshalX): should be more clear for bool? @@ -384,11 +399,13 @@ where Ok(()) } else if let Ok(b) = obj.cast::() { // FIXME (MarshalX): it's not efficient to try to parse it as CID - let cid = Cid::try_from(b.as_bytes()); + let bytes = b.as_bytes(); + let cid = Cid::try_from(bytes); if cid.is_ok() { - types::Tag(42, b.as_bytes()).encode(w)?; + // by providing custom encoding we avoid extra allocation + types::Tag(42, PrefixedCidBytes(bytes)).encode(w)?; } else { - types::Bytes(b.as_bytes()).encode(w)?; + types::Bytes(bytes).encode(w)?; } Ok(()) From 34bcadc70c2e056c79bd06064536b20d35772bb9 Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Fri, 5 Dec 2025 12:35:23 +0100 Subject: [PATCH 2/4] fix fmt --- src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9481c64..0dfbff2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -266,14 +266,16 @@ where // we expect CIDs to have a leading zero byte if cid.len() <= 1 || cid[0] != 0 { return Err(anyhow!("Invalid CID")); - } - + } + let cid_without_prefix = &cid[1..]; if Cid::try_from(cid_without_prefix).is_err() { return Err(anyhow!("Invalid CID")); } - PyBytes::new(py, cid_without_prefix).into_pyobject(py)?.into() + PyBytes::new(py, cid_without_prefix) + .into_pyobject(py)? + .into() } major::SIMPLE => match byte { // FIXME(MarshalX): should be more clear for bool? From 490357c7fbb48bb3572f9b80e554e988ca40be68 Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Fri, 5 Dec 2025 12:49:12 +0100 Subject: [PATCH 3/4] fix DASL testing --- .github/workflows/test.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 15ef256..90e25e9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -38,12 +38,21 @@ jobs: path: dasl-testing ref: ${{ env.DASL_TESTING_REF }} + - name: Build libipld wheel. + run: uv build --wheel + - name: Run DASL Python harness. working-directory: dasl-testing/harnesses/python env: UV_PYTHON: "3.13" # DASL testing requires Python 3.13+ run: | - RESULT_JSON=$(uv run --with cbor2 python main.py libipld) + LIBIPLD_WHEEL=$(ls ../../../dist/libipld-*.whl) + RESULT_JSON=$( + uv run \ + --with cbor2 \ + --with "$LIBIPLD_WHEEL" \ + python main.py libipld + ) { echo "## DASL Results" From eefd06f961f717dfad8e13190fd4db4efd4ef51c Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Fri, 5 Dec 2025 12:50:59 +0100 Subject: [PATCH 4/4] fix wheel for DASL --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 90e25e9..ee64d89 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,6 +40,8 @@ jobs: - name: Build libipld wheel. run: uv build --wheel + env: + UV_PYTHON: "3.13" # DASL testing requires Python 3.13+ - name: Run DASL Python harness. working-directory: dasl-testing/harnesses/python