Skip to content
83 changes: 83 additions & 0 deletions avro/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,89 @@ set_schemata_equality_comparator(Box::new(MyCustomSchemataEq));
If the application parses schemas before setting a comparator, the default comparator will be
registered and used!

### Deserializing Avro Byte Arrays
Comment thread
roofdiver marked this conversation as resolved.

If using the Serde way to deserialize avro files, there are sometimes special derive statements
that need to be applied in the case of byte arrays.

```rust
use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, Serialize)]
struct SampleStruct {
#[serde(with = "apache_avro::serde_avro_bytes")]
non_optional_bytes: Vec<u8>,
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
optional_bytes: Option<Vec<u8>>,
#[serde(with = "apache_avro::serde_avro_fixed")]
non_optional_fixed: [u8; 6],
#[serde(with = "apache_avro::serde_avro_fixed_opt")]
optional_fixed: Option<[u8; 6]>,
}
```

Here is a complete example of a serde round trip of a struct with a nullable byte array:

```rust
use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, PartialEq, Serialize)]
struct ExampleByteArray {
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
data_bytes: Option<Vec<u8>>,
description: Option<String>,
}

fn serde_byte_array() {
let raw_schema = r#"
{
"type": "record",
"name": "SimpleRecord",
"fields": [
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
{"name": "description", "type": ["null", "string"], "default": null}
]
}"#;

let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();

// Create vector of ExampleByteArray
let records = vec![
ExampleByteArray {
data_bytes: Some(vec![1, 2, 3, 4, 5]),
description: Some("First record".to_string()),
},
ExampleByteArray {
data_bytes: None,
description: Some("Second record".to_string()),
},
ExampleByteArray {
data_bytes: Some(vec![10, 20, 30]),
description: None,
},
];

// Serialize records to Avro binary format with the schema
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
for record in &records {
writer.append_ser(record).unwrap();
}

let avro_data = writer.into_inner().unwrap();


// Deserialize Avro binary data back into ExampleByteArray structs
let reader = apache_avro::Reader::new(&avro_data[..]).unwrap();
let deserialized_records: Vec<ExampleByteArray> = reader
.map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
.collect();

assert_eq!(records, deserialized_records);
}
```

Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs

<!-- cargo-rdme end -->

## License
Expand Down
83 changes: 83 additions & 0 deletions avro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,89 @@
//! If the application parses schemas before setting a comparator, the default comparator will be
//! registered and used!
//!
//! ## Deserializing Avro Byte Arrays
//!
//! If using the Serde way to deserialize avro files, there are sometimes special derive statements
//! that need to be applied in the case of byte arrays.
//!
//! ```rust
//! use serde::{Deserialize, Serialize};
//!
//! #[derive(Debug, Deserialize, Serialize)]
//! struct SampleStruct {
//! #[serde(with = "apache_avro::serde_avro_bytes")]
//! non_optional_bytes: Vec<u8>,
//! #[serde(with = "apache_avro::serde_avro_bytes_opt")]
//! optional_bytes: Option<Vec<u8>>,
//! #[serde(with = "apache_avro::serde_avro_fixed")]
//! non_optional_fixed: [u8; 6],
//! #[serde(with = "apache_avro::serde_avro_fixed_opt")]
//! optional_fixed: Option<[u8; 6]>,
//! }
//! ```
//!
//! Here is a complete example of a serde round trip of a struct with a nullable byte array:
//!
//! ```rust
//! use serde::{Deserialize, Serialize};
//!
//! #[derive(Debug, Deserialize, PartialEq, Serialize)]
//! struct ExampleByteArray {
//! #[serde(with = "apache_avro::serde_avro_bytes_opt")]
//! data_bytes: Option<Vec<u8>>,
//! description: Option<String>,
//! }
//!
//! fn serde_byte_array() {
//! let raw_schema = r#"
//! {
//! "type": "record",
//! "name": "SimpleRecord",
//! "fields": [
//! {"name": "data_bytes", "type": ["null", "bytes"], "default": null},
//! {"name": "description", "type": ["null", "string"], "default": null}
//! ]
//! }"#;
//!
//! let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();
//!
//! // Create vector of ExampleByteArray
//! let records = vec![
//! ExampleByteArray {
//! data_bytes: Some(vec![1, 2, 3, 4, 5]),
//! description: Some("First record".to_string()),
//! },
//! ExampleByteArray {
//! data_bytes: None,
//! description: Some("Second record".to_string()),
//! },
//! ExampleByteArray {
//! data_bytes: Some(vec![10, 20, 30]),
//! description: None,
//! },
//! ];
//!
//! // Serialize records to Avro binary format with the schema
//! let mut writer = apache_avro::Writer::new(&schema, Vec::new());
//! for record in &records {
//! writer.append_ser(record).unwrap();
//! }
//!
//! let avro_data = writer.into_inner().unwrap();
//!
//!
//! // Deserialize Avro binary data back into ExampleByteArray structs
//! let reader = apache_avro::Reader::new(&avro_data[..]).unwrap();
//! let deserialized_records: Vec<ExampleByteArray> = reader
//! .map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
//! .collect();
//!
//! assert_eq!(records, deserialized_records);
//! }
//! ```
//!
//! Full implementation and other options for things like fixed byte arrays can found in src/bytes.rs
//!

mod bigdecimal;
mod bytes;
Expand Down
112 changes: 112 additions & 0 deletions avro/tests/avro-rs-285-bytes_deserialization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use apache_avro_test_helper::TestResult;
use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, PartialEq, Serialize)]
struct ExampleByteArray {
#[serde(with = "apache_avro::serde_avro_bytes_opt")]
data_bytes: Option<Vec<u8>>,
description: Option<String>,
}

#[derive(Deserialize, Serialize)]
struct ExampleByteArrayFiltered {
description: Option<String>,
}

#[test]
fn avro_rs_285_bytes_deserialization_round_trip() -> TestResult {
// define schema
let raw_schema = r#"
{
"type": "record",
"name": "SimpleRecord",
"fields": [
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
{"name": "description", "type": ["null", "string"], "default": null}
]
}
"#;

let schema = apache_avro::Schema::parse_str(raw_schema)?;

let records = vec![
ExampleByteArray {
data_bytes: Some(vec![1, 2, 3, 4, 5]),
description: Some("First record".to_string()),
},
ExampleByteArray {
data_bytes: None,
description: Some("Second record".to_string()),
},
ExampleByteArray {
data_bytes: Some(vec![10, 20, 30]),
description: None,
},
];

// serialize records to Avro binary format with schema
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
for record in &records {
writer.append_ser(record)?;
}

let avro_data = writer.into_inner()?;

// deserialize Avro binary data back into ExampleByteArray structs
let reader = apache_avro::Reader::new(&avro_data[..])?;
let deserialized_records: Vec<ExampleByteArray> = reader
.map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap())
.collect();

assert_eq!(records, deserialized_records);
Ok(())
}

#[test]
fn avro_rs_285_bytes_deserialization_filtered_round_trip() -> TestResult {
let raw_schema = r#"
{
"type": "record",
"name": "SimpleRecord",
"fields": [
{"name": "data_bytes", "type": ["null", "bytes"], "default": null},
{"name": "description", "type": ["null", "string"], "default": null}
]
}
"#;

let schema = apache_avro::Schema::parse_str(raw_schema)?;

let records = vec![
ExampleByteArray {
data_bytes: Some(vec![1, 2, 3, 4, 5]),
description: Some("First record".to_string()),
},
ExampleByteArray {
data_bytes: None,
description: Some("Second record".to_string()),
},
ExampleByteArray {
data_bytes: Some(vec![10, 20, 30]),
description: None,
},
];

// serialize records to Avro binary format with schema
let mut writer = apache_avro::Writer::new(&schema, Vec::new());
for record in &records {
writer.append_ser(record)?;
}

let avro_data = writer.into_inner()?;

// deserialize Avro binary data back into ExampleByteArrayFiltered structs
let reader = apache_avro::Reader::new(&avro_data[..])?;
let deserialized_records: Vec<ExampleByteArrayFiltered> = reader
.map(|value| apache_avro::from_value::<ExampleByteArrayFiltered>(&value.unwrap()).unwrap())
.collect();

assert_eq!(records.len(), deserialized_records.len());

Ok(())
}