diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/CMakeLists.txt
new file mode 100755
index 0000000000..73adc944d9
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/CMakeLists.txt
@@ -0,0 +1,20 @@
+if(UNIX)
+    # Direct CMake to use dpcpp rather than the default C++ compiler/linker
+    set(CMAKE_CXX_COMPILER dpcpp)
+else() # Windows
+    # Force CMake to use dpcpp rather than the default C++ compiler/linker 
+    # (needed on Windows only)
+    include (CMakeForceCompiler)
+    CMAKE_FORCE_CXX_COMPILER (dpcpp IntelDPCPP)
+    include (Platform/Windows-Clang)
+endif()
+
+cmake_minimum_required (VERSION 3.4)
+
+project(DataBundle CXX)
+
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+
+add_subdirectory (src)
diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/License.txt
new file mode 100755
index 0000000000..7c8b8a36c6
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/License.txt
@@ -0,0 +1,23 @@
+Copyright Intel Corporation
+
+SPDX-License-Identifier: MIT
+https://opensource.org/licenses/MIT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/README.md b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/README.md
new file mode 100755
index 0000000000..1027445905
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/README.md
@@ -0,0 +1,327 @@
+# Data Transfers Using Pipes
+This FPGA tutorial shows how to use pipes to transfer data between kernels.
+
+***Documentation***:  The [DPC++ FPGA Code Samples Guide](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of DPC++ for FPGA. <br>
+The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) is the reference manual for targeting FPGAs through DPC++. <br>
+The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming.
+
+| Optimized for                     | Description
+---                                 |---
+| OS                                | Linux* Ubuntu* 18.04/20.04, RHEL*/CentOS* 8, SUSE* 15; Windows* 10
+| Hardware                          | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA <br> Intel® FPGA Programmable Acceleration Card (PAC) D5005 (with Intel Stratix® 10 SX) <br> Intel® FPGA 3rd party / custom platforms with oneAPI support <br> *__Note__: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04*
+| Software                          | Intel® oneAPI DPC++ Compiler <br> Intel® FPGA Add-On for oneAPI Base Toolkit
+| What you will learn               | The basics of the of DPC++ pipes extension for FPGA<br> How to declare and use pipes in a DPC++ program
+| Time to complete                  | 15 minutes
+
+
+
+## Purpose
+This tutorial demonstrates how a kernel in a DPC++ FPGA program transfers
+data to or from another kernel using the pipe abstraction.
+
+### Definition of a Pipe
+The primary goal of pipes is to allow concurrent execution of kernels that need
+to exchange data.
+
+A pipe is a FIFO data structure connecting two endpoints that communicate
+using the pipe's `read` and `write` operations. An endpoint can be either a kernel
+or an external I/O on the FPGA. Therefore, there are three types of pipes:
+* kernel-kernel
+* kernel-I/O
+* I/O-kernel
+
+This tutorial focuses on kernel-kernel pipes, but
+the concepts discussed here apply to other kinds of pipes as well.
+
+The `read` and `write` operations have two variants:
+* Blocking variant: Blocking operations may not return immediately but are always successful.
+* Non-blocking variant: Non-blocking operations take an extra boolean parameter
+that is set to `true` if the operation happened successfully.
+
+Data flows in a single direction inside pipes. In other words, for a pipe `P`
+and two kernels using `P`, one of the kernels is exclusively going to perform
+`write` to `P` while the other kernel is exclusively going to perform `read` from
+`P`. Bidirectional communication can be achieved using two pipes.
+
+Each pipe has a configurable `capacity` parameter describing the number of `write`
+operations that may be performed without any `read` operations being performed. For example,
+consider a pipe `P` with capacity 3, and two kernels `K1` and `K2` using
+`P`. Assume that `K1` performed the following sequence of operations:
+
+ `write(1)`, `write(2)`, `write(3)`
+
+In this situation, the pipe is full because three (the `capacity` of
+`P`) `write` operations were performed without any `read` operation. In this
+situation, a `read` must occur before any other `write` is allowed.
+
+If a `write` is attempted to a full pipe, one of two behaviors occur:
+
+  * If the operation is non-blocking, it returns immediately, and its
+  boolean parameter is set to `false`. The `write` does not have any effect.
+  * If the operation is blocking, it does not return until a `read` is
+  performed by the other endpoint. Once the `read` is performed, the `write`
+  takes place.
+
+The blocking and non-blocking `read` operations have analogous behaviors when
+the pipe is empty.
+
+### Defining a Pipe in DPC++
+
+In DPC++, pipes are defined as a class with static members. To declare a pipe that
+transfers integer data and has  `capacity=4`, use a type alias:
+
+```c++
+using ProducerToConsumerPipe = pipe<  // Defined in the DPC++ headers.
+  class ProducerConsumerPipe,         // An identifier for the pipe.
+  int,                                // The type of data in the pipe.
+  4>;                                 // The capacity of the pipe.
+```
+
+The `class ProducerToConsumerPipe` template parameter is important to the
+uniqueness of the pipe. This class need not be defined but must be distinct
+for each pipe. Consider another type alias with the exact same parameters:
+
+```c++
+using ProducerToConsumerPipe2 = pipe<  // Defined in the DPC++ headers.
+  class ProducerConsumerPipe,          // An identifier for the pipe.
+  int,                                 // The type of data in the pipe.
+  4>;                                  // The capacity of the pipe.
+```
+
+The uniqueness of a pipe is derived from a combination of all three template
+parameters. Since `ProducerToConsumerPipe` and `ProducerToConsumerPipe2` have
+the same template parameters, they define the same pipe.
+
+### Using a Pipe in DPC++
+
+This code sample defines a `Consumer` and a `Producer` kernel connected
+by the pipe `ProducerToConsumerPipe`. Kernels use the
+`ProducerToConsumerPipe::write` and `ProducerToConsumerPipe::read` methods for
+communication.
+
+The `Producer` kernel reads integers from the global memory and writes those integers
+into `ProducerToConsumerPipe`, as shown in the following code snippet:
+
+```c++
+void Producer(queue &q, buffer<int, 1> &input_buffer) {
+  std::cout << "Enqueuing producer...\n";
+
+  auto e = q.submit([&](handler &h) {
+    accessor input_accessor(input_buffer, h, read_only);
+    auto num_elements = input_buffer.get_count();
+
+    h.single_task<ProducerTutorial>([=]() {
+      for (size_t i = 0; i < num_elements; ++i) {
+        ProducerToConsumerPipe::write(input_accessor[i]);
+      }
+    });
+  });
+}
+```
+
+The `Consumer` kernel reads integers from `ProducerToConsumerPipe`, processes
+the integers (`ConsumerWork(i)`), and writes the result into the global memory.
+
+```c++
+void Consumer(queue &q, buffer<int, 1> &output_buffer) {
+  std::cout << "Enqueuing consumer...\n";
+
+  auto e = q.submit([&](handler &h) {
+    accessor out_accessor(out_buf, h, write_only, no_init);
+    size_t num_elements = output_buffer.get_count();
+
+    h.single_task<ConsumerTutorial>([=]() {
+      for (size_t i = 0; i < num_elements; ++i) {
+        int input = ProducerToConsumerPipe::read();
+        int answer = ConsumerWork(input);
+        output_accessor[i] = answer;
+      }
+    });
+  });
+}
+```
+
+**NOTE:** The `read` and `write` operations used are blocking. If
+`ConsumerWork` is an expensive operation, then `Producer` might fill
+`ProducerToConsumerPipe` faster than `Consumer` can read from it, causing
+`Producer` to block occasionally.
+
+## Key Concepts
+* The basics of the of DPC++ pipes extension for FPGA
+* How to declare and use pipes in a DPC++ program
+
+## License
+Code samples are licensed under the MIT license. See
+[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
+
+Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt)
+
+## Building the `pipes` Tutorial
+
+### Include Files
+The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system.
+
+### Running Samples in DevCloud
+If running a sample in the Intel DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10.  Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)).
+
+When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h.
+
+
+### Using Visual Studio Code*  (Optional)
+
+You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
+and browse and download samples.
+
+The basic steps to build and run a sample using VS Code include:
+ - Download a sample using the extension **Code Sample Browser for Intel oneAPI Toolkits**.
+ - Configure the oneAPI environment with the extension **Environment Configurator for Intel oneAPI Toolkits**.
+ - Open a Terminal in VS Code (**Terminal>New Terminal**).
+ - Run the sample in the VS Code terminal using the instructions below.
+
+To learn more about the extensions and how to configure the oneAPI environment, see
+[Using Visual Studio Code with Intel® oneAPI Toolkits](https://software.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
+
+After learning how to use the extensions for Intel oneAPI Toolkits, return to this readme for instructions on how to build and run a sample.
+
+### On a Linux* System
+
+1. Generate the `Makefile` by running `cmake`.
+     ```
+   mkdir build
+   cd build
+   ```
+   To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command:
+    ```
+    cmake ..
+   ```
+   Alternatively, to compile for the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX), run `cmake` using the command:
+
+   ```
+   cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10
+   ```
+   You can also compile for a custom FPGA platform. Ensure that the board support package is installed on your system. Then run `cmake` using the command:
+   ```
+   cmake .. -DFPGA_BOARD=<board-support-package>:<board-variant>
+   ```
+
+2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow:
+
+   * Compile for emulation (fast compile time, targets emulated FPGA device):
+      ```
+      make fpga_emu
+      ```
+   * Generate the optimization report:
+     ```
+     make report
+     ```
+   * Compile for FPGA hardware (longer compile time, targets FPGA device):
+     ```
+     make fpga
+     ```
+3. (Optional) As the above hardware compile may take several hours to complete, FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) can be downloaded <a href="https://iotdk.intel.com/fpga-precompiled-binaries/latest/pipes.fpga.tar.gz" download>here</a>.
+
+### On a Windows* System
+
+1. Generate the `Makefile` by running `cmake`.
+     ```
+   mkdir build
+   cd build
+   ```
+   To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command:
+    ```
+    cmake -G "NMake Makefiles" ..
+   ```
+   Alternatively, to compile for the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX), run `cmake` using the command:
+
+   ```
+   cmake -G "NMake Makefiles" .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10
+   ```
+   You can also compile for a custom FPGA platform. Ensure that the board support package is installed on your system. Then run `cmake` using the command:
+   ```
+   cmake -G "NMake Makefiles" .. -DFPGA_BOARD=<board-support-package>:<board-variant>
+   ```
+
+2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow:
+
+   * Compile for emulation (fast compile time, targets emulated FPGA device):
+     ```
+     nmake fpga_emu
+     ```
+   * Generate the optimization report:
+     ```
+     nmake report
+     ```
+   * Compile for FPGA hardware (longer compile time, targets FPGA device):
+     ```
+     nmake fpga
+     ```
+
+*Note:* The Intel® PAC with Intel Arria® 10 GX FPGA and Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) do not support Windows*. Compiling to FPGA hardware on Windows* requires a third-party or custom Board Support Package (BSP) with Windows* support.<br>
+*Note:* If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example c:\samples\build.  You can then run cmake from that directory, and provide cmake with the full path to your sample directory.
+
+ ### In Third-Party Integrated Development Environments (IDEs)
+
+You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide)
+
+## Examining the Reports
+Locate `report.html` in the `pipes_report.prj/reports/` directory. Open the report in any of Chrome*, Firefox*, Edge*, or Internet Explorer*.
+
+Navigate to the "System Viewer" to visualize the structure of the kernel system. Identify the pipe connecting the two kernels.
+
+## Running the Sample
+
+ 1. Run the sample on the FPGA emulator (the kernel executes on the CPU):
+     ```
+     ./pipes.fpga_emu     (Linux)
+     pipes.fpga_emu.exe   (Windows)
+     ```
+2. Run the sample on the FPGA device:
+     ```
+     ./pipes.fpga         (Linux)
+     ```
+
+### Example of Output
+You should see the following output in the console:
+
+1. When running on the FPGA emulator
+    ```
+    Input Array Size: 8192
+    Enqueuing producer...
+    Enqueuing consumer...
+
+    Profiling Info
+      Producer:
+        Start time: 0 ms
+        End time: +8.18174 ms
+        Kernel Duration: 8.18174 ms
+      Consumer:
+        Start time: +7.05307 ms
+        End time: +8.18231 ms
+        Kernel Duration: 1.12924 ms
+      Design Duration: 8.18231 ms
+      Design Throughput: 4.00474 MB/s
+
+    PASSED: The results are correct
+    ```
+    NOTE: The FPGA emulator does not accurately represent the performance nor the kernels' relative timing (i.e., the start and end times).
+
+2. When running on the FPGA device
+    ```
+    Input Array Size: 1048576
+    Enqueuing producer...
+    Enqueuing consumer...
+
+    Profiling Info
+      Producer:
+        Start time: 0 ms
+        End time: +4.481 ms
+        Kernel Duration: 4.481 ms
+      Consumer:
+        Start time: +0.917 ms
+        End time: +4.484 ms
+        Kernel Duration: 3.568 ms
+      Design Duration: 4.484 ms
+      Design Throughput: 935.348 MB/s
+
+    PASSED: The results are correct
+    ```
diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/data_bundle.sln b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/data_bundle.sln
new file mode 100755
index 0000000000..7d001912ad
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/data_bundle.sln
@@ -0,0 +1,25 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.28307.705
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "data_bundle", "data_bundle.vcxproj", "{BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Debug|x64.ActiveCfg = Debug|x64
+		{BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Debug|x64.Build.0 = Debug|x64
+		{BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Release|x64.ActiveCfg = Release|x64
+		{BE9E5E70-F644-4119-9A1F-E2B75C85B9E2}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {47B77939-C7AE-44EC-AD38-EF8459A9C41A}
+	EndGlobalSection
+EndGlobal
diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/data_bundle.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/data_bundle.vcxproj
new file mode 100755
index 0000000000..3d069c443e
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/data_bundle.vcxproj
@@ -0,0 +1,160 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\data_bundle.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="README.md" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{be9e5e70-f644-4119-9a1f-e2b75c85b9e2}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>data_bundle</RootNamespace>
+    <WindowsTargetPlatformVersion>$(WindowsSDKVersion.Replace("\",""))</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>Intel(R) oneAPI DPC++ Compiler 2022</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>Intel(R) oneAPI DPC++ Compiler 2022</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>Intel(R) oneAPI DPC++ Compiler 2022</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>Intel(R) oneAPI DPC++ Compiler 2022</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+      <AdditionalIncludeDirectories>$(ONEAPI_ROOT)dev-utilities\latest\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+      <EnableFPGACompilationAhead>true</EnableFPGACompilationAhead>
+      <AdditionalOptions>-DFPGA_EMULATOR %(AdditionalOptions)</AdditionalOptions>
+      <ObjectFileName>$(IntDir)data_bundle.obj</ObjectFileName>
+      <AdditionalIncludeDirectories>$(ONEAPI_ROOT)dev-utilities\latest\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+      <AdditionalIncludeDirectories>$(ONEAPI_ROOT)dev-utilities\latest\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+      <EnableFPGACompilationAhead>true</EnableFPGACompilationAhead>
+      <AdditionalOptions>-DFPGA_EMULATOR %(AdditionalOptions)</AdditionalOptions>
+      <ObjectFileName>$(IntDir)data_bundle.obj</ObjectFileName>
+      <AdditionalIncludeDirectories>$(ONEAPI_ROOT)dev-utilities\latest\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/sample.json
new file mode 100755
index 0000000000..1bbfd84306
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/sample.json
@@ -0,0 +1,61 @@
+{
+  "guid": "58CF1ABA-5D08-40B7-ACC2-5CB904261413",
+  "name": "Pipes",
+  "categories": ["Toolkit/oneAPI Direct Programming/DPC++ FPGA/Tutorials/Features"],
+  "description": "How to use Pipes to transfer data between kernels on an Intel® FPGA",
+  "toolchain": ["dpcpp"],
+  "os": ["linux", "windows"],
+  "targetDevice": ["FPGA"],
+  "builder": ["ide", "cmake"],
+  "languages": [{"cpp":{}}],
+  "ciTests": {
+    "linux": [
+      {
+        "id": "fpga_emu",
+        "steps": [
+          "dpcpp --version",
+          "mkdir build",
+          "cd build",
+          "cmake ..",
+          "make fpga_emu",
+          "./pipes.fpga_emu"
+        ]
+      },
+      {
+        "id": "report",
+        "steps": [
+          "dpcpp --version",
+          "mkdir build",
+          "cd build",
+          "cmake ..",
+          "make report"
+        ]
+      }
+    ],
+    "windows": [
+      {
+        "id": "fpga_emu",
+        "steps": [
+          "dpcpp --version",
+          "cd ../../..",
+          "mkdir build",
+          "cd build",
+          "cmake -G \"NMake Makefiles\" ../Tutorials/Features/pipes",
+          "nmake fpga_emu",
+          "pipes.fpga_emu.exe"
+        ]
+      },
+      {
+        "id": "report",
+        "steps": [
+          "dpcpp --version",
+          "cd ../../..",
+          "mkdir build",
+          "cd build",
+          "cmake -G \"NMake Makefiles\" ../Tutorials/Features/pipes",
+          "nmake report"
+        ]
+      }
+    ]
+  }
+}
diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/src/CMakeLists.txt
new file mode 100755
index 0000000000..6d501e9c84
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/src/CMakeLists.txt
@@ -0,0 +1,76 @@
+# To see a Makefile equivalent of this build system:
+# https://github.com/oneapi-src/oneAPI-samples/blob/master/DirectProgramming/DPC++/ProjectTemplates/makefile-fpga
+
+set(SOURCE_FILE data_bundle.cpp)
+set(TARGET_NAME data_bundle)
+set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu)
+set(FPGA_TARGET ${TARGET_NAME}.fpga)
+
+# FPGA board selection
+if(NOT DEFINED FPGA_BOARD)
+    set(FPGA_BOARD "intel_a10gx_pac:pac_a10")
+    message(STATUS "FPGA_BOARD was not specified.\
+                    \nConfiguring the design to run on the default FPGA board ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \
+                    \nPlease refer to the README for information on board selection.")
+else()
+    message(STATUS "Configuring the design to run on FPGA board ${FPGA_BOARD}")
+endif()
+
+# This is a Windows-specific flag that enables exception handling in host code
+if(WIN32)
+    set(WIN_FLAG "/EHsc")
+endif()
+
+# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages.
+# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V).
+# 2. The "link" stage invokes the compiler's FPGA backend before linking.
+#    For this reason, FPGA backend flags must be passed as link flags in CMake.
+set(EMULATOR_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga -DFPGA_EMULATOR")
+set(EMULATOR_LINK_FLAGS "-fintelfpga")
+set(HARDWARE_COMPILE_FLAGS "-Wall ${WIN_FLAG} -fintelfpga")
+set(HARDWARE_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}")
+# use cmake -D USER_HARDWARE_FLAGS=<flags> to set extra flags for FPGA backend compilation
+
+###############################################################################
+### FPGA Emulator
+###############################################################################
+# To compile in a single command:
+#    dpcpp -fintelfpga -DFPGA_EMULATOR pipes.cpp -o pipes.fpga_emu
+# CMake executes:
+#    [compile] dpcpp -fintelfpga -DFPGA_EMULATOR -o pipes.cpp.o -c pipes.cpp
+#    [link]    dpcpp -fintelfpga pipes.cpp.o -o pipes.fpga_emu
+add_executable(${EMULATOR_TARGET} ${SOURCE_FILE})
+target_include_directories(${EMULATOR_TARGET} PRIVATE ../../../../include)
+set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}")
+set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}")
+add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET})
+
+###############################################################################
+### Generate Report
+###############################################################################
+# To compile manually:
+#   dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> -fsycl-link=early pipes.cpp -o pipes_report.a
+set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a)
+# The compile output is not an executable, but an intermediate compilation result unique to DPC++.
+add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE})
+target_include_directories(${FPGA_EARLY_IMAGE} PRIVATE ../../../../include)
+add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE})
+set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}")
+set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -fsycl-link=early")
+# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus®
+
+###############################################################################
+### FPGA Hardware
+###############################################################################
+# To compile in a single command:
+#   dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> pipes.cpp -o pipes.fpga
+# CMake executes:
+#   [compile] dpcpp -fintelfpga -o pipes.cpp.o -c pipes.cpp
+#   [link]    dpcpp -fintelfpga -Xshardware -Xsboard=<FPGA_BOARD> pipes.cpp.o -o pipes.fpga
+add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE})
+target_include_directories(${FPGA_TARGET} PRIVATE ../../../../include)
+add_custom_target(fpga DEPENDS ${FPGA_TARGET})
+set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}")
+set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}")
+# The -reuse-exe flag enables rapid recompilation of host-only code changes.
+# See DPC++FPGA/GettingStarted/fast_recompile for details.
diff --git a/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/src/data_bundle.cpp b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/src/data_bundle.cpp
new file mode 100644
index 0000000000..fd55dc8584
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/Tutorials/DesignPatterns/data_bundle/src/data_bundle.cpp
@@ -0,0 +1,187 @@
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <CL/sycl.hpp>
+#include <sycl/ext/intel/fpga_extensions.hpp>
+
+// dpc_common.hpp can be found in the dev-utilities include folder.
+// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp
+#include "dpc_common.hpp"
+
+#include "memory_tools.hpp"
+#include "metaprogramming_tools.hpp"
+#include "unrolled_loop.hpp"
+
+using namespace sycl;
+
+////////////////////////////////////////////////////////////////////////////////
+template <typename T, int n>
+struct DataBundle {
+  static constexpr size_t size = n;
+  using value_type = T;
+
+  DataBundle() {}
+
+  DataBundle(const T v) {
+    #pragma unroll
+    for (int idx = 0; idx < n; idx++) {
+      data_[idx] = v;
+    }
+  }
+
+  DataBundle(const DataBundle &v) {
+    #pragma unroll
+    for (int idx = 0; idx < n; idx++) {
+      data_[idx] = v.data_[idx];
+    }
+  }
+
+  DataBundle &operator=(const DataBundle &op) {
+    #pragma unroll
+    for (int idx = 0; idx < n; idx++) {
+      data_[idx] = op.data_[idx];
+    }
+    return *this;
+  }
+
+  // get a specific value in the bundle
+  T& operator[](int i) { return data_[i]; }
+  const T& operator[](int i) const { return data_[i]; }
+
+  // get a raw pointer to underlying data
+  T *Data() { return &data_[0]; }
+
+private:
+  T data_[n];  
+};
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef ELEMENTS_PER_CYCLE
+#define ELEMENTS_PER_CYCLE 4
+#endif
+constexpr int kElementsPerCycle = ELEMENTS_PER_CYCLE;
+static_assert(kElementsPerCycle > 0);
+using PipeType = DataBundle<int, kElementsPerCycle>;
+
+using BufferPipe = ext::intel::pipe<class BufferPipeID, PipeType>;
+using USMPipe = ext::intel::pipe<class USMPipeID, PipeType>;
+
+class BufferProducerID;
+class BufferConsumerID;
+class USMProducerID;
+class USMConsumerID;
+
+event SubmitBufferProducer(queue &q, buffer<int, 1> &in_buf, size_t count) {
+  return q.submit([&](handler &h) {
+    accessor input_accessor(in_buf, h, read_only);
+    h.single_task<BufferProducerID>([=] {
+      fpga_tools::MemoryToPipe<BufferPipe, kElementsPerCycle>(input_accessor, count);
+    });
+  });
+}
+
+event SubmitBufferConsumer(queue &q, buffer<int, 1> &out_buf, size_t count) {
+  return  q.submit([&](handler &h) {
+    accessor out_accessor(out_buf, h, write_only, no_init);
+    h.single_task<BufferConsumerID>([=] {
+      fpga_tools::PipeToMemory<BufferPipe, kElementsPerCycle>(out_accessor, count);
+    });
+  });
+}
+
+event SubmitUSMProducer(queue &q, device_ptr<int> in_ptr, size_t count) {
+  return q.single_task<USMProducerID>([=] {
+    fpga_tools::MemoryToPipe<USMPipe, kElementsPerCycle>(in_ptr, count);
+  });
+}
+
+event SubmitUSMConsumer(queue &q, device_ptr<int> out_ptr, size_t count) {
+  return q.single_task<USMConsumerID>([=] {
+    fpga_tools::PipeToMemory<USMPipe, kElementsPerCycle>(out_ptr, count);
+  });
+}
+
+int main(int argc, char *argv[]) {
+#if defined(FPGA_EMULATOR)
+  size_t count_bundles = 4096;
+#else
+  size_t count_bundles = 1 << 16;
+#endif
+
+  size_t count_total = count_bundles * kElementsPerCycle;
+
+  std::vector<int> producer_input(count_total, -1);
+  std::vector<int> buffer_output(count_total, -1);
+  std::vector<int> usm_output(count_total, -1);
+
+  std::iota(producer_input.begin(), producer_input.end(), 1);
+
+#if defined(FPGA_EMULATOR)
+  ext::intel::fpga_emulator_selector device_selector;
+#else
+  ext::intel::fpga_selector device_selector;
+#endif
+
+  try {
+    queue q(device_selector, dpc_common::exception_handler);
+
+    // testing buffers
+    std::cout << "Running buffer test" << std::endl;
+    {
+      buffer producer_buffer(producer_input);
+      buffer consumer_buffer(buffer_output);
+
+      SubmitBufferProducer(q, producer_buffer, count_bundles);
+      SubmitBufferConsumer(q, consumer_buffer, count_bundles);
+    }
+
+    // testing USM
+    std::cout << "Running USM test" << std::endl;
+    {
+      int* producer_ptr = malloc_device<int>(count_total, q);
+      int* consumer_ptr = malloc_device<int>(count_total, q);
+      device_ptr<int> producer_ptr_d(producer_ptr);
+      device_ptr<int> consumer_ptr_d(consumer_ptr);
+
+      q.memcpy(producer_ptr, producer_input.data(), count_total * sizeof(int)).wait();
+      auto pe = SubmitUSMProducer(q, producer_ptr_d, count_bundles);
+      auto ce = SubmitUSMConsumer(q, consumer_ptr_d, count_bundles);
+      pe.wait();
+      ce.wait();
+      q.memcpy(usm_output.data(), consumer_ptr, count_total * sizeof(int)).wait();
+
+      free(producer_ptr, q);
+      free(consumer_ptr, q);
+    }
+  } catch (exception const &e) {
+    // Catches exceptions in the host code
+    std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n";
+    std::terminate();
+  }
+
+  // Verify the result
+  std::cout << "Validating buffer output" << std::endl;
+  for (size_t i = 0; i < count_total; i++) {
+    if (buffer_output[i] != producer_input[i]) {
+      std::cerr << "ERROR: at index " << i << " expected: " << producer_input[i]
+                << ", got: " << buffer_output[i] << "\n";
+      std::cout << "FAILED" << std::endl;
+      return 1;
+    }
+  }
+
+  std::cout << "Validating USM output" << std::endl;
+  for (size_t i = 0; i < count_total; i++) {
+    if (usm_output[i] != producer_input[i]) {
+      std::cerr << "ERROR: at index " << i << " expected: " << producer_input[i]
+                << ", got: " << usm_output[i] << "\n";
+      std::cout << "FAILED" << std::endl;
+      return 1;
+    }
+  }
+
+  std::cout << "PASSED\n";
+  return 0;
+}
diff --git a/DirectProgramming/DPC++FPGA/include/memory_tools.hpp b/DirectProgramming/DPC++FPGA/include/memory_tools.hpp
new file mode 100644
index 0000000000..dfdd4fc69f
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/include/memory_tools.hpp
@@ -0,0 +1,69 @@
+#ifndef __MEMORY_TOOLS_HPP__
+#define __MEMORY_TOOLS_HPP__
+
+#include <type_traits>
+
+#include "metaprogramming_tools.hpp"
+
+namespace fpga_tools {
+
+template<typename Pipe, typename PtrT>
+void MemoryToPipe(PtrT in_ptr, size_t count_div_elements_per_cycle) {
+  using PipeT = decltype(Pipe::read());
+  static_assert(fpga_tools::has_subscript_v<PtrT>);
+  static_assert(std::is_same_v<PipeT, std::remove_const_t<std::remove_reference_t<decltype(std::declval<PtrT>()[0])>>>);
+
+  for (size_t i = 0; i < count_div_elements_per_cycle; i++) {
+    Pipe::write(in_ptr[i]);
+  }
+}
+
+template<typename Pipe, int elements_per_cycle, typename PtrT>
+void MemoryToPipe(PtrT in_ptr, size_t count_div_elements_per_cycle) {
+  using PipeT = decltype(Pipe::read());
+  static_assert(fpga_tools::has_subscript_v<PipeT>);
+  static_assert(fpga_tools::has_subscript_v<PtrT>);
+  static_assert(PipeT::size == elements_per_cycle);
+  static_assert(std::is_same_v<std::remove_reference_t<decltype(std::declval<PipeT>()[0])>, std::remove_const_t<std::remove_reference_t<decltype(std::declval<PtrT>()[0])>>>);
+
+  for (size_t i = 0; i < count_div_elements_per_cycle; i++) {
+    PipeT pipe_data;
+    #pragma unroll
+    for (int j = 0; j < elements_per_cycle; j++) {
+      pipe_data[j] = in_ptr[i * elements_per_cycle + j];
+    }
+    Pipe::write(pipe_data);
+  }
+}
+
+template<typename Pipe, typename PtrT>
+void PipeToMemory(PtrT out_ptr, size_t count_div_elements_per_cycle) {
+  using PipeT = decltype(Pipe::read());
+  static_assert(fpga_tools::has_subscript_v<PtrT>);
+  static_assert(std::is_same_v<PipeT, std::remove_const_t<std::remove_reference_t<decltype(std::declval<PtrT>()[0])>>>);
+
+  for (size_t i = 0; i < count_div_elements_per_cycle; i++) {
+    out_ptr[i] = Pipe::read();
+  }
+}
+
+template<typename Pipe, int elements_per_cycle, typename PtrT>
+void PipeToMemory(PtrT out_ptr, size_t count_div_elements_per_cycle) {
+  using PipeT = decltype(Pipe::read());
+  static_assert(fpga_tools::has_subscript_v<PipeT>);
+  static_assert(fpga_tools::has_subscript_v<PtrT>);
+  static_assert(PipeT::size == elements_per_cycle);
+  static_assert(std::is_same_v<std::remove_reference_t<decltype(std::declval<PipeT>()[0])>, std::remove_const_t<std::remove_reference_t<decltype(std::declval<PtrT>()[0])>>>);
+
+  for (size_t i = 0; i < count_div_elements_per_cycle; i++) {
+    auto pipe_data = Pipe::read();
+    #pragma unroll
+    for (int j = 0; j < elements_per_cycle; j++) {
+      out_ptr[i * elements_per_cycle + j] = pipe_data[j];
+    }
+  }
+}
+
+} // namespace fpga_tools
+
+#endif /* __MEMORY_TOOLS_HPP__ */
\ No newline at end of file
diff --git a/DirectProgramming/DPC++FPGA/include/metaprogramming_tools.hpp b/DirectProgramming/DPC++FPGA/include/metaprogramming_tools.hpp
new file mode 100644
index 0000000000..c34137554e
--- /dev/null
+++ b/DirectProgramming/DPC++FPGA/include/metaprogramming_tools.hpp
@@ -0,0 +1,117 @@
+#ifndef __METAPROGRAMMING_TOOLS_HPP__
+#define __METAPROGRAMMING_TOOLS_HPP__
+
+#include <type_traits>
+
+namespace fpga_tools {
+
+//
+// The code below creates the constexprs 'make_integer_range'
+// and 'make_index_range' these are akin to 'std::make_integer_sequence'
+// and 'std::make_index_sequence', respectively.
+// However they allow you to specificy a range and can either increment
+// or decrement, rather than a strict increasing sequence
+//
+template <typename T, typename, T begin, bool increase>
+struct integer_range_impl;
+
+// incrementing case
+template <typename T, T... N, T begin>
+struct integer_range_impl<T, std::integer_sequence<T, N...>, begin, true> {
+  using type = std::integer_sequence<T, N + begin...>;
+};
+
+// decrementing case
+template <typename T, T... N, T begin>
+struct integer_range_impl<T, std::integer_sequence<T, N...>, begin, false> {
+  using type = std::integer_sequence<T, begin - N...>;
+};
+
+// integer_range
+template <typename T, T begin, T end>
+using integer_range = typename integer_range_impl<
+    T, std::make_integer_sequence<T, (begin < end) ? end - begin : begin - end>,
+    begin, (begin < end)>::type;
+
+//
+// make_integer_range
+//
+// USAGE:
+//    make_integer_range<int,1,10>{} ==> 1,2,...,9
+//    make_integer_range<int,10,1>{} ==> 10,9,...,2
+//
+template <class T, T begin, T end>
+using make_integer_range = integer_range<T, begin, end>;
+
+//
+// make_index_range
+//
+// USAGE:
+//    make_index_range<1,10>{} ==> 1,2,...,9
+//    make_index_range<10,1>{} ==> 10,9,...,2
+//
+template <std::size_t begin, std::size_t end>
+using make_index_range = integer_range<std::size_t, begin, end>;
+
+//
+// The code below creates the constexprs 'make_integer_pow2_sequence'
+// and 'make_index_pow2_sequence'. These generate the sequence
+// 2^0, 2^1, 2^2, ... , 2^(N-1) = 1,2,4,...,2^(N-1)
+//
+template <typename T, typename>
+struct integer_pow2_sequence_impl;
+
+template <typename T, T... Pows>
+struct integer_pow2_sequence_impl<T, std::integer_sequence<T, Pows...>> {
+  using type = std::integer_sequence<T, (T(1) << Pows)...>;
+};
+
+// integer_pow2_sequence
+template <typename T, T N>
+using integer_pow2_sequence =
+    typename integer_pow2_sequence_impl<T,
+                                        std::make_integer_sequence<T, N>>::type;
+
+//
+// make_integer_pow2_sequence
+//
+// USAGE:
+//    make_integer_pow2_sequence<int,5>{} ==> 1,2,4,8,16
+//
+template <class T, T N>
+using make_integer_pow2_sequence = integer_pow2_sequence<T, N>;
+
+//
+// make_index_pow2_sequence
+//
+// USAGE:
+//    make_index_pow2_sequence<5>{} ==> 1,2,4,8,16
+//
+template <std::size_t N>
+using make_index_pow2_sequence = integer_pow2_sequence<std::size_t, N>;
+
+//
+// Checks for existence of subscript operator
+//
+template <typename... >
+using void_t = void;
+
+template<class T, typename = void>
+struct has_subscript_operator : std::false_type { };
+
+template<class T>
+struct has_subscript_operator<T, void_t<decltype(std::declval<T>()[1])>> 
+  : std::true_type { };
+
+template <class T>
+struct has_subscript {
+  static constexpr bool value =
+    std::is_same_v<typename has_subscript_operator<T>::type, std::true_type>;
+};
+
+template <class T>
+inline constexpr bool has_subscript_v = has_subscript<T>::value;
+
+} // namespace fpga_tools
+
+#endif  /* __METAPROGRAMMING_TOOLS_HPP__ */
\ No newline at end of file
diff --git a/DirectProgramming/DPC++FPGA/include/unrolled_loop.hpp b/DirectProgramming/DPC++FPGA/include/unrolled_loop.hpp
index 33e7e32e39..d657e003d5 100755
--- a/DirectProgramming/DPC++FPGA/include/unrolled_loop.hpp
+++ b/DirectProgramming/DPC++FPGA/include/unrolled_loop.hpp
@@ -4,93 +4,9 @@
 #include <type_traits>
 #include <utility>
 
-namespace fpga_tools {
-
-//
-// The code below creates the constexprs 'make_integer_range'
-// and 'make_index_range' these are akin to 'std::make_integer_sequence'
-// and 'std::make_index_sequence', respectively.
-// However they allow you to specificy a range and can either increment
-// or decrement, rather than a strict increasing sequence
-//
-template <typename T, typename, T begin, bool increase>
-struct integer_range_impl;
-
-// incrementing case
-template <typename T, T... N, T begin>
-struct integer_range_impl<T, std::integer_sequence<T, N...>, begin, true> {
-  using type = std::integer_sequence<T, N + begin...>;
-};
-
-// decrementing case
-template <typename T, T... N, T begin>
-struct integer_range_impl<T, std::integer_sequence<T, N...>, begin, false> {
-  using type = std::integer_sequence<T, begin - N...>;
-};
-
-// integer_range
-template <typename T, T begin, T end>
-using integer_range = typename integer_range_impl<
-    T, std::make_integer_sequence<T, (begin < end) ? end - begin : begin - end>,
-    begin, (begin < end)>::type;
-
-//
-// make_integer_range
-//
-// USAGE:
-//    make_integer_range<int,1,10>{} ==> 1,2,...,9
-//    make_integer_range<int,10,1>{} ==> 10,9,...,2
-//
-template <class T, T begin, T end>
-using make_integer_range = integer_range<T, begin, end>;
-
-//
-// make_index_range
-//
-// USAGE:
-//    make_index_range<1,10>{} ==> 1,2,...,9
-//    make_index_range<10,1>{} ==> 10,9,...,2
-//
-template <std::size_t begin, std::size_t end>
-using make_index_range = integer_range<std::size_t, begin, end>;
-
-//
-// The code below creates the constexprs 'make_integer_pow2_sequence'
-// and 'make_index_pow2_sequence'. These generate the sequence
-// 2^0, 2^1, 2^2, ... , 2^(N-1) = 1,2,4,...,2^(N-1)
-//
-template <typename T, typename>
-struct integer_pow2_sequence_impl;
-
-template <typename T, T... Pows>
-struct integer_pow2_sequence_impl<T, std::integer_sequence<T, Pows...>> {
-  using type = std::integer_sequence<T, (T(1) << Pows)...>;
-};
-
-// integer_pow2_sequence
-template <typename T, T N>
-using integer_pow2_sequence =
-    typename integer_pow2_sequence_impl<T,
-                                        std::make_integer_sequence<T, N>>::type;
-
-//
-// make_integer_pow2_sequence
-//
-// USAGE:
-//    make_integer_pow2_sequence<int,5>{} ==> 1,2,4,8,16
-//
-template <class T, T N>
-using make_integer_pow2_sequence = integer_pow2_sequence<T, N>;
-
-//
-// make_index_pow2_sequence
-//
-// USAGE:
-//    make_index_pow2_sequence<5>{} ==> 1,2,4,8,16
-//
-template <std::size_t N>
-using make_index_pow2_sequence = integer_pow2_sequence<std::size_t, N>;
+#include "metaprogramming_tools.hpp"
 
+namespace fpga_tools {
 ///////////////////////////////////////////////////////////////////////////////
 //
 // Example usage for UnrolledLoop constexpr: