Skip to content

Commit b628670

Browse files
authored
Add deserializer class (#37)
1 parent f630a81 commit b628670

File tree

14 files changed

+1221
-11
lines changed

14 files changed

+1221
-11
lines changed

.github/workflows/linux.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ jobs:
6060
working-directory: build
6161
run: cmake --build . --target run_example
6262

63+
- name: Build deserializer example
64+
working-directory: build
65+
run: cmake --build . --target deserializer_example
66+
67+
- name: Run deserializer example
68+
working-directory: build
69+
run: cmake --build . --target run_deserializer_example
70+
6371
- name: Build test_integration_tools
6472
working-directory: build
6573
run: cmake --build . --target test_integration_tools
@@ -113,6 +121,14 @@ jobs:
113121
working-directory: build
114122
run: cmake --build . --target run_example
115123

124+
- name: Build deserializer example
125+
working-directory: build
126+
run: cmake --build . --target deserializer_example
127+
128+
- name: Run deserializer example
129+
working-directory: build
130+
run: cmake --build . --target run_deserializer_example
131+
116132
- name: Build test_integration_tools
117133
working-directory: build
118134
run: cmake --build . --target test_integration_tools

.github/workflows/osx.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ jobs:
6565
working-directory: build
6666
run: cmake --build . --target run_example
6767

68+
- name: Build deserializer example
69+
working-directory: build
70+
run: cmake --build . --target deserializer_example
71+
72+
- name: Run deserializer example
73+
working-directory: build
74+
run: cmake --build . --target run_deserializer_example
75+
6876
- name: Build test_integration_tools
6977
working-directory: build
7078
run: cmake --build . --target test_integration_tools
@@ -123,6 +131,14 @@ jobs:
123131
working-directory: build
124132
run: cmake --build . --target run_example
125133

134+
- name: Build deserializer example
135+
working-directory: build
136+
run: cmake --build . --target deserializer_example
137+
138+
- name: Run deserializer example
139+
working-directory: build
140+
run: cmake --build . --target run_deserializer_example
141+
126142
- name: Build test_integration_tools
127143
working-directory: build
128144
run: cmake --build . --target test_integration_tools

.github/workflows/windows.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,14 @@ jobs:
7373
working-directory: build
7474
run: cmake --build . --config ${{ matrix.build_type }} --target run_test_integration_tools
7575

76+
- name: Build deserializer example
77+
working-directory: build
78+
run: cmake --build . --config ${{ matrix.build_type }} --target deserializer_example
79+
80+
- name: Run deserializer example
81+
working-directory: build
82+
run: cmake --build . --config ${{ matrix.build_type }} --target run_deserializer_example
83+
7684
- name: Install
7785
working-directory: build
7886
run: cmake --install . --config ${{ matrix.build_type }}
@@ -123,6 +131,14 @@ jobs:
123131
working-directory: build
124132
run: cmake --build . --config ${{ matrix.build_type }} --target run_example
125133

134+
- name: Build deserializer example
135+
working-directory: build
136+
run: cmake --build . --config ${{ matrix.build_type }} --target deserializer_example
137+
138+
- name: Run deserializer example
139+
working-directory: build
140+
run: cmake --build . --config ${{ matrix.build_type }} --target run_deserializer_example
141+
126142
- name: Build test_integration_tools
127143
working-directory: build
128144
run: cmake --build . --config ${{ matrix.build_type }} --target test_integration_tools

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ set(SPARROW_IPC_HEADERS
128128
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_utils.hpp
129129
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp
130130
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize.hpp
131+
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserializer.hpp
131132
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/encapsulated_message.hpp
132133
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/flatbuffer_utils.hpp
133134
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/magic_values.hpp

README.md

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,21 +107,91 @@ void stream_record_batches(std::ostream& os, record_batch_source& source)
107107
108108
### Deserialize a stream into record batches
109109
110+
#### Using the function API
111+
110112
```cpp
111113
#include <vector>
112-
#include <sparrow_ipc/deserializer.hpp>
114+
#include <sparrow_ipc/deserialize.hpp>
113115
#include <sparrow/record_batch.hpp>
114116
115117
namespace sp = sparrow;
116118
namespace sp_ipc = sparrow_ipc;
117119
118-
std::vector<sp::record_batch> deserialize_stream_to_batches(const std::vector<uint8_t>& stream_data)
120+
std::vector<sp::record_batch> deserialize_stream_example(const std::vector<uint8_t>& stream_data)
119121
{
122+
// Deserialize the entire stream at once
120123
auto batches = sp_ipc::deserialize_stream(stream_data);
121124
return batches;
122125
}
123126
```
124127

128+
#### Using the deserializer class
129+
130+
The deserializer class allows you to accumulate record batches into an existing container as you deserialize data:
131+
132+
```cpp
133+
#include <iostream>
134+
#include <span>
135+
#include <vector>
136+
#include <sparrow_ipc/deserializer.hpp>
137+
#include <sparrow/record_batch.hpp>
138+
139+
namespace sp = sparrow;
140+
namespace sp_ipc = sparrow_ipc;
141+
142+
void deserializer_basic_example(const std::vector<uint8_t>& stream_data)
143+
{
144+
// Create a container to hold the deserialized batches
145+
std::vector<sp::record_batch> batches;
146+
147+
// Create a deserializer that will append to our container
148+
sp_ipc::deserializer deser(batches);
149+
150+
// Deserialize the stream data
151+
deser.deserialize(std::span<const uint8_t>(stream_data));
152+
153+
// Process the accumulated batches
154+
for (const auto& batch : batches)
155+
{
156+
std::cout << "Batch with " << batch.nb_rows() << " rows and " << batch.nb_columns() << " columns\n";
157+
}
158+
}
159+
```
160+
161+
#### Incremental deserialization
162+
163+
The deserializer class is particularly useful for streaming scenarios where data arrives in chunks:
164+
165+
```cpp
166+
#include <iostream>
167+
#include <span>
168+
#include <vector>
169+
#include <sparrow_ipc/deserializer.hpp>
170+
#include <sparrow/record_batch.hpp>
171+
172+
namespace sp = sparrow;
173+
namespace sp_ipc = sparrow_ipc;
174+
175+
void deserializer_incremental_example(const std::vector<std::vector<uint8_t>>& stream_chunks)
176+
{
177+
// Container to accumulate all deserialized batches
178+
std::vector<sp::record_batch> batches;
179+
180+
// Create a deserializer
181+
sp_ipc::deserializer deser(batches);
182+
183+
// Deserialize chunks as they arrive using the streaming operator
184+
for (const auto& chunk : stream_chunks)
185+
{
186+
deser << std::span<const uint8_t>(chunk);
187+
std::cout << "After chunk: " << batches.size() << " batches accumulated\n";
188+
}
189+
190+
// All batches are now available in the container
191+
std::cout << "Total batches deserialized: " << batches.size() << "\n";
192+
}
193+
```
194+
125195
## Documentation
126196

127197
The documentation (currently being written) can be found at https://quantstack.github.io/sparrow-ipc/index.html

docs/source/main_page.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,41 @@ Sparrow-IPC requires a modern C++ compiler supporting C++20:
2121
| MSVC | 19.41 or higher |
2222

2323
This software is licensed under the BSD-3-Clause license. See the [LICENSE](https://github.com/QuantStack/sparrow-ipc/blob/main/LICENSE) file for details.
24+
25+
Getting Started
26+
---------------
27+
28+
### Quick Example
29+
30+
```cpp
31+
#include <vector>
32+
#include <sparrow_ipc/deserialize.hpp>
33+
#include <sparrow_ipc/memory_output_stream.hpp>
34+
#include <sparrow_ipc/serializer.hpp>
35+
#include <sparrow/record_batch.hpp>
36+
37+
namespace sp = sparrow;
38+
namespace sp_ipc = sparrow_ipc;
39+
40+
// Serialize record batches
41+
std::vector<uint8_t> serialize(const std::vector<sp::record_batch>& batches)
42+
{
43+
std::vector<uint8_t> stream_data;
44+
sp_ipc::memory_output_stream stream(stream_data);
45+
sp_ipc::serializer serializer(stream);
46+
serializer << batches << sp_ipc::end_stream;
47+
return stream_data;
48+
}
49+
50+
// Deserialize record batches
51+
std::vector<sp::record_batch> deserialize(const std::vector<uint8_t>& stream_data)
52+
{
53+
return sp_ipc::deserialize_stream(stream_data);
54+
}
55+
```
56+
57+
Documentation
58+
-------------
59+
60+
- @ref serialization "Serialization and Deserialization" - How to serialize and deserialize record batches
61+
- @ref dev_build "Development Build" - How to build the project for development

docs/source/serialization.md

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Serialization and Deserialization {#serialization}
2+
3+
This page describes how to serialize and deserialize record batches using `sparrow-ipc`.
4+
5+
## Overview
6+
7+
`sparrow-ipc` provides two main approaches for both serialization and deserialization:
8+
9+
- **Function API**: Simple one-shot operations for serializing/deserializing complete data
10+
- **Class API**: Streaming-oriented classes (`serializer` and `deserializer`) for incremental operations
11+
12+
## Serialization
13+
14+
### Serialize record batches to a memory stream
15+
16+
The simplest way to serialize record batches is to use the `serializer` class with a `memory_output_stream`:
17+
18+
\snippet write_and_read_streams.cpp example_serialize_to_stream
19+
20+
### Serialize individual record batches
21+
22+
You can also serialize record batches one at a time:
23+
24+
\snippet write_and_read_streams.cpp example_serialize_individual
25+
26+
## Deserialization
27+
28+
### Using the function API
29+
30+
The simplest way to deserialize a complete Arrow IPC stream is using `deserialize_stream`:
31+
32+
\snippet deserializer_example.cpp example_deserialize_stream
33+
34+
### Using the deserializer class
35+
36+
The `deserializer` class provides more control over deserialization and is useful when you want to:
37+
- Accumulate batches into an existing container
38+
- Deserialize data incrementally as it arrives
39+
- Process multiple streams into a single container
40+
41+
#### Basic usage
42+
43+
\snippet deserializer_example.cpp example_deserializer_basic
44+
45+
#### Incremental deserialization
46+
47+
The `deserializer` class is particularly useful for streaming scenarios where data arrives in chunks:
48+
49+
\snippet deserializer_example.cpp example_deserializer_incremental
50+
51+
#### Chaining deserializations
52+
53+
The streaming operator can be chained for fluent API usage:
54+
55+
\snippet deserializer_example.cpp example_deserializer_chaining

examples/CMakeLists.txt

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,18 @@ target_link_libraries(write_and_read_streams
1111
arrow-testing-data
1212
)
1313

14+
# Create executable for the deserializer_example
15+
add_executable(deserializer_example deserializer_example.cpp)
16+
17+
# Link against sparrow-ipc and its dependencies
18+
target_link_libraries(deserializer_example
19+
PRIVATE
20+
sparrow-ipc
21+
sparrow::sparrow
22+
)
23+
1424
# Set C++ standard to match the main project
15-
set_target_properties(write_and_read_streams
25+
set_target_properties(write_and_read_streams deserializer_example
1626
PROPERTIES
1727
CXX_STANDARD 20
1828
CXX_STANDARD_REQUIRED ON
@@ -26,8 +36,15 @@ target_include_directories(write_and_read_streams
2636
${CMAKE_BINARY_DIR}/generated
2737
)
2838

39+
target_include_directories(deserializer_example
40+
PRIVATE
41+
${CMAKE_SOURCE_DIR}/include
42+
${CMAKE_BINARY_DIR}/generated
43+
)
44+
2945
# Ensure generated flatbuffer headers are available
3046
add_dependencies(write_and_read_streams generate_flatbuffers_headers)
47+
add_dependencies(deserializer_example generate_flatbuffers_headers)
3148

3249
# Optional: Copy to build directory for easy execution
3350
if(WIN32)
@@ -38,7 +55,7 @@ if(WIN32)
3855
set(ZSTD_DLL_TARGET libzstd_static)
3956
endif()
4057

41-
# On Windows, copy required DLLs
58+
# On Windows, copy required DLLs for write_and_read_streams
4259
set(DLL_COPY_COMMANDS "") # Initialize a list to hold all copy commands
4360
# Add unconditional copy commands
4461
list(APPEND DLL_COPY_COMMANDS
@@ -66,6 +83,31 @@ if(WIN32)
6683
${DLL_COPY_COMMANDS}
6784
COMMENT "Copying required DLLs to example executable directory"
6885
)
86+
87+
# On Windows, copy required DLLs for deserializer_example
88+
set(DLL_COPY_COMMANDS_DESER "") # Initialize a list to hold all copy commands
89+
list(APPEND DLL_COPY_COMMANDS_DESER
90+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
91+
"$<TARGET_FILE:sparrow::sparrow>"
92+
"$<TARGET_FILE_DIR:deserializer_example>"
93+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
94+
"$<TARGET_FILE:sparrow-ipc>"
95+
"$<TARGET_FILE_DIR:deserializer_example>"
96+
)
97+
98+
if(ZSTD_DLL_TARGET)
99+
list(APPEND DLL_COPY_COMMANDS_DESER
100+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
101+
"$<TARGET_FILE:${ZSTD_DLL_TARGET}>"
102+
"$<TARGET_FILE_DIR:deserializer_example>"
103+
)
104+
endif()
105+
106+
add_custom_command(
107+
TARGET deserializer_example POST_BUILD
108+
${DLL_COPY_COMMANDS_DESER}
109+
COMMENT "Copying required DLLs to deserializer_example executable directory"
110+
)
69111
endif()
70112

71113
# Create a custom target to easily run the example
@@ -77,3 +119,13 @@ add_custom_target(run_example
77119
)
78120

79121
set_target_properties(run_example PROPERTIES FOLDER "Examples")
122+
123+
# Create a custom target to run the deserializer example
124+
add_custom_target(run_deserializer_example
125+
COMMAND deserializer_example
126+
DEPENDS deserializer_example
127+
COMMENT "Running deserializer_example"
128+
USES_TERMINAL
129+
)
130+
131+
set_target_properties(run_deserializer_example PROPERTIES FOLDER "Examples")

0 commit comments

Comments
 (0)