diff --git a/tree/ntuple/CMakeLists.txt b/tree/ntuple/CMakeLists.txt index 5ceb4e3115345..e7d510f943ff4 100644 --- a/tree/ntuple/CMakeLists.txt +++ b/tree/ntuple/CMakeLists.txt @@ -27,6 +27,8 @@ HEADERS ROOT/RNTupleAttrUtils.hxx ROOT/RNTupleAttrWriting.hxx ROOT/RNTupleDescriptor.hxx + ROOT/RNTupleComposer.hxx + ROOT/RNTupleComposerEntry.hxx ROOT/RNTupleFillContext.hxx ROOT/RNTupleFillStatus.hxx ROOT/RNTupleImtTaskScheduler.hxx @@ -36,7 +38,6 @@ HEADERS ROOT/RNTupleModel.hxx ROOT/RNTupleParallelWriter.hxx ROOT/RNTupleProcessor.hxx - ROOT/RNTupleProcessorEntry.hxx ROOT/RNTupleRange.hxx ROOT/RNTupleReadOptions.hxx ROOT/RNTupleReader.hxx @@ -71,6 +72,8 @@ SOURCES src/RNTuple.cxx src/RNTupleAttrReading.cxx src/RNTupleAttrWriting.cxx + src/RNTupleComposer.cxx + src/RNTupleComposerEntry.cxx src/RNTupleDescriptor.cxx src/RNTupleDescriptorFmt.cxx src/RNTupleFillContext.cxx @@ -79,8 +82,6 @@ SOURCES src/RNTupleMetrics.cxx src/RNTupleModel.cxx src/RNTupleParallelWriter.cxx - src/RNTupleProcessor.cxx - src/RNTupleProcessorEntry.cxx src/RNTupleReader.cxx src/RNTupleSerialize.cxx src/RNTupleTypes.cxx diff --git a/tree/ntuple/inc/ROOT/RNTupleComposer.hxx b/tree/ntuple/inc/ROOT/RNTupleComposer.hxx new file mode 100644 index 0000000000000..69501cc1f9f81 --- /dev/null +++ b/tree/ntuple/inc/ROOT/RNTupleComposer.hxx @@ -0,0 +1,803 @@ +/// \file ROOT/RNTupleComposer.hxx +/// \author Florine de Geus +/// \date 2024-03-26 +/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback +/// is welcome! + +/************************************************************************* + * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * + * All rights reserved. * + * * + * For the licensing terms see $ROOTSYS/LICENSE. * + * For the list of contributors see $ROOTSYS/README/CREDITS. * + *************************************************************************/ + +#ifndef ROOT_RNTupleComposer +#define ROOT_RNTupleComposer + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace ROOT { +namespace Experimental { + +namespace Internal { +struct RNTupleComposerEntryLoader; +} // namespace Internal + +// clang-format off +/** +\class ROOT::Experimental::RNTupleOpenSpec +\ingroup NTuple +\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleComposer. + +An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the +TDirectory (or any of its subclasses) that contains the RNTuple. + +Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python. +*/ +// clang-format on +class RNTupleOpenSpec { + friend class RNTupleComposer; + friend class RNTupleSingleComposer; + friend class RNTupleJoinComposer; + +private: + std::string fNTupleName; + std::variant fStorage; + +public: + RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {} + RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {} + + std::unique_ptr CreatePageSource() const; +}; + +// clang-format off +/** +\class ROOT::Experimental::RNTupleComposerOptionalPtr +\ingroup NTuple +\brief The RNTupleComposerOptionalPtr provides access to values from fields present in an RNTupleComposer, with support +and checks for missing values. +*/ +// clang-format on +template +class RNTupleComposerOptionalPtr { + friend class RNTupleComposer; + +private: + Internal::RNTupleComposerEntry *fComposerEntry; + Internal::RNTupleComposerEntry::FieldIndex_t fFieldIndex; + + RNTupleComposerOptionalPtr(Internal::RNTupleComposerEntry *composerEntry, + Internal::RNTupleComposerEntry::FieldIndex_t fieldIdx) + : fComposerEntry(composerEntry), fFieldIndex(fieldIdx) + { + } + +public: + ///////////////////////////////////////////////////////////////////////////// + /// \brief Check if the pointer currently holds a valid value. + bool HasValue() const { return fComposerEntry->IsValidField(fFieldIndex); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get a shared pointer to the field value managed by the composition's entry. + /// + /// \return A `std::shared_ptr` if the field is valid in the current entry, or a `nullptr` otherwise. + std::shared_ptr GetPtr() const + { + if (fComposerEntry->IsValidField(fFieldIndex)) { + const auto &value = fComposerEntry->GetValue(fFieldIndex); + return value.template GetPtr(); + } + + return nullptr; + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get a non-owning pointer to the field value managed by the composition's entry. + /// + /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise. + T *GetRawPtr() const { return GetPtr().get(); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Bind the value to `valuePtr`. + /// + /// \param[in] valuePtr Pointer to bind the value to. + /// + /// \warning Use this function with care! Values may not always be valid for every entry during reading, for + /// example when a field is not present in one of the chained compositions or when during a join operation, no + /// matching entry in the auxiliary composition can be found. Reading `valuePtr` as-is therefore comes with the risk + /// of reading invalid data. After binding a pointer to an `RNTupleComposerOptionalPtr`, we *strongly* recommend only + /// accessing its data through this interface, to ensure that only valid data can be read. + void BindRawPtr(T *valuePtr) { fComposerEntry->BindRawPtr(fFieldIndex, valuePtr); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get a reference to the field value managed by the composition's entry. + /// + /// Throws an exception if the field is invalid in the composition's current entry. + const T &operator*() const + { + if (auto ptr = GetPtr()) + return *ptr; + else + throw RException(R__FAIL("cannot read \"" + fComposerEntry->FindFieldName(fFieldIndex) + + "\" because it has no value for the current entry")); + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Access the field value managed by the composition's entry. + /// + /// Throws an exception if the field is invalid in the composition's current entry. + const T *operator->() const + { + if (auto ptr = GetPtr()) + return ptr.get(); + else + throw RException(R__FAIL("cannot read \"" + fComposerEntry->FindFieldName(fFieldIndex) + + "\" because it has no value for the current entry")); + } +}; + +// clang-format off +/** +\class ROOT::Experimental::RNTupleComposerOptionalPtr +\ingroup NTuple +\brief Specialization of RNTupleComposerOptionalPtr for `void`-type pointers. +*/ +// clang-format on +template <> +class RNTupleComposerOptionalPtr { + friend class RNTupleComposer; + +private: + Internal::RNTupleComposerEntry *fComposerEntry; + Internal::RNTupleComposerEntry::FieldIndex_t fFieldIndex; + + RNTupleComposerOptionalPtr(Internal::RNTupleComposerEntry *composerEntry, + Internal::RNTupleComposerEntry::FieldIndex_t fieldIdx) + : fComposerEntry(composerEntry), fFieldIndex(fieldIdx) + { + } + +public: + ///////////////////////////////////////////////////////////////////////////// + /// \brief Check if the pointer currently holds a valid value. + bool HasValue() const { return fComposerEntry->IsValidField(fFieldIndex); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the pointer to the field value managed by the composition's entry. + /// + /// \return A `std::shared_ptr` if the field is valid in the current entry, or a `nullptr` otherwise. + std::shared_ptr GetPtr() const + { + if (fComposerEntry->IsValidField(fFieldIndex)) { + const auto &value = fComposerEntry->GetValue(fFieldIndex); + return value.template GetPtr(); + } + + return nullptr; + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get a non-owning pointer to the field value managed by the composition's entry. + /// + /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise. + void *GetRawPtr() const { return GetPtr().get(); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Bind the value to `valuePtr`. + /// + /// \param[in] valuePtr Pointer to bind the value to. + /// + /// \warning Use this function with care! Values may not always be valid for every entry, for example when a field + /// is not present in one of the chained composition or when during a join operation, no matching entry in the + /// auxiliary composition can be found. Reading `valuePtr` as-is therefore comes with the risk of reading invalid + /// data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through the + /// interface of the returned `RNTupleComposerOptionalPtr`, to ensure that only valid data can be read. + void BindRawPtr(void *valuePtr) { fComposerEntry->BindRawPtr(fFieldIndex, valuePtr); } +}; + +// clang-format off +/** +\class ROOT::Experimental::RNTupleComposer +\ingroup NTuple +\brief Interface for composing combinations of RNTuples at runtime, either vertically ("chains") or horizontally ("joins") + +Iteration over entries of composed RNTuples is provided via the RNTupleProcessor. + +Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples): + +~~~{.cpp} +#include +#include +using ROOT::Experimental::RNTupleComposer; +using ROOT::Experimental::RNTupleProcessor; +using ROOT::Experimental::RNTupleOpenSpec; + +std::vector ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}}; +auto composer = RNTupleComposer::CreateChain(ntuples); + +auto pt = composer->RequestField("pt"); + +for (const auto idx : RNTupleProcessor(*composer)) { + std::cout << "event = " << idx << ", pt = " << *pt << std::endl; +} +~~~ + +An RNTupleComposer is created either: +1. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple; +2. By providing a previously created RNTupleComposer. + +Because the schemas of each RNTuple that are part of an RNTupleComposer may not necessarily be identical, or because +it can occur that entries are only partially complete in a join-based composition, field values may be marked as +"invalid", at which point their data should not be read. This is handled by the RNTupleComposerOptionalPtr +that is returned by RequestField(). +*/ +// clang-format on +class RNTupleComposer { + friend struct ROOT::Experimental::Internal::RNTupleComposerEntryLoader; // for unit tests + friend class RNTupleSingleComposer; + friend class RNTupleChainComposer; + friend class RNTupleJoinComposer; + friend class RNTupleProcessor; + +protected: + std::string fCompositionName; + std::shared_ptr fEntry = nullptr; + std::unordered_set fFieldIdxs; + + /// Total number of entries. Only to be used internally by the composer, not meant to be exposed in the public + /// interface. + ROOT::NTupleSize_t fNEntries = kInvalidNTupleIndex; + + ROOT::NTupleSize_t fCurrentEntryNumber = ROOT::kInvalidDescriptorId; //< Current entry number + std::size_t fCurrentChainIndex = 0; //< Index of the currently connected composition in the composer chain + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Initialize the composer by creating an (initially empty) `fEntry`, or setting an existing one. + virtual void Initialize(std::shared_ptr entry) = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Check if the composer already has been initialized. + bool IsInitialized() const { return fEntry != nullptr; } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Connect fields to the page source of the composition's underlying RNTuple(s). + /// + /// \param[in] fieldIdxs Indices of the fields to connect. + /// \param[in] provenance Provenance of the composition. + /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying + /// RNTuple source changed. + virtual void Connect(const std::unordered_set &fieldIdxs, + const Internal::RNTupleCompositionProvenance &provenance, bool updateFields) = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Load the entry identified by the provided entry number. + /// + /// \param[in] entryNumber Entry number to load + /// + /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise. + virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the total number of entries in this composition. + virtual ROOT::NTupleSize_t GetNEntries() = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Check if a field exists on-disk and can be read by the composition. + /// + /// \param[in] fieldName Name of the field to check. + virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add a field to the entry. + /// + /// + /// \param[in] fieldName Name of the field to add. + /// \param[in] typeName Type of the field to add. + /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be + /// created. + /// \param[in] provenance Provenance of the composition. + /// + /// \return The index of the newly added field in the entry. + /// + /// In case the field was already present in the entry, the index of the existing field is returned. + virtual Internal::RNTupleComposerEntry::FieldIndex_t + AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr, + const Internal::RNTupleCompositionProvenance &provenance) = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add the entry mappings for this composition to the provided join table. + /// + /// \param[in] joinTable the join table to map the entries to. + /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use + /// with respect to the composition's position in the chain. + virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Composer-specific implementation for printing its structure, called by PrintStructure(). + /// + /// \param[in,out] output Output stream to print to. + virtual void PrintStructureImpl(std::ostream &output) const = 0; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Create a new base RNTupleComposer. + /// + /// \param[in] compositionName Name of the composed RNTuple. By default, this is the name of the underlying RNTuple + /// for RNTupleSingleComposer, the name of the first composition in the chain for RNTupleChainComposer, or the name + /// of the primary RNTuple for RNTupleJoinComposer. + RNTupleComposer(std::string_view compositionName) : fCompositionName(compositionName) {} + +public: + RNTupleComposer(const RNTupleComposer &) = delete; + RNTupleComposer(RNTupleComposer &&) = delete; + RNTupleComposer &operator=(const RNTupleComposer &) = delete; + RNTupleComposer &operator=(RNTupleComposer &&) = delete; + virtual ~RNTupleComposer() = default; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the entry number that is currently loaded. + ROOT::NTupleSize_t GetCurrentEntryNumber() const { return fCurrentEntryNumber; } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the number of the current position in the chain of (composed) RNTuples. + /// + /// This method is only relevant for the RNTupleChainComposer. For the other compositions, 0 is always returned. + std::size_t GetCurrentChainIndex() const { return fCurrentChainIndex; } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the name of the composed RNTuple. + /// + /// Unless this name was explicitly specified during creation of the composition, this is the name of the underlying + /// RNTuple for RNTupleSingleComposer, the name of the first composition in the chain for RNTupleChainComposer, or + /// the name of the primary RNTuple for RNTupleJoinComposer. + const std::string &GetCompositionName() const { return fCompositionName; } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Request access to a field in a composed RNTuple (for reading). + /// + /// \tparam T Type of the requested field. + /// + /// \param[in] fieldName Name of the requested field. + /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be + /// created. + /// + /// \return An RNTupleComposerOptionalPtr of type `T`, which provides access to the field's value. + /// + /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry, for example when a field + /// is not present in one of the chained composition or when during a join operation, no matching entry in the + /// auxiliary composition can be found. Reading `valuePtr` as-is therefore comes with the risk of reading invalid + /// data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through the + /// interface of the returned `RNTupleComposerOptionalPtr`, to ensure that only valid data can be read. + template + RNTupleComposerOptionalPtr RequestField(const std::string &fieldName, void *valuePtr = nullptr) + { + Initialize(fEntry); + std::string typeName{}; + if constexpr (!std::is_void_v) { + typeName = ROOT::Internal::GetRenormalizedTypeName(typeid(T)); + } + auto fieldIdx = AddFieldToEntry(fieldName, typeName, valuePtr, Internal::RNTupleCompositionProvenance()); + return RNTupleComposerOptionalPtr(fEntry.get(), fieldIdx); + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Request access to a field for reading during processing. + /// + /// \param[in] fieldName Name of the requested field. + /// \param[in] typeName Type of the requested field. + /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be + /// created. + /// + /// \return An void-type RNTupleComposerOptionalPtr, which provides access to the field's value. + /// + /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry, for example when a field + /// is not present in one of the chained composition or when during a join operation, no matching entry in the + /// auxiliary composition can be found. Reading `valuePtr` as-is therefore comes with the risk of reading invalid + /// data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through the + /// interface of the returned `RNTupleComposerOptionalPtr`, to ensure that only valid data can be read. + RNTupleComposerOptionalPtr + RequestField(const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr) + { + Initialize(fEntry); + auto fieldIdx = AddFieldToEntry(fieldName, typeName, valuePtr, Internal::RNTupleCompositionProvenance()); + return RNTupleComposerOptionalPtr(fEntry.get(), fieldIdx); + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Print a graphical representation of the composition. + /// + /// \param[in,out] output Stream to print to (default is stdout). + /// + /// ### Example: + /// The structure of a composition representing a join between a single primary RNTuple and a chain of two auxiliary + /// RNTuples will be printed as follows: + /// ~~~ + /// +-----------------------------+ +-----------------------------+ + /// | ntuple | | ntuple_aux | + /// | ntuple.root | | ntuple_aux1.root | + /// +-----------------------------+ +-----------------------------+ + /// +-----------------------------+ + /// | ntuple_aux | + /// | ntuple_aux2.root | + /// +-----------------------------+ + /// ~~~ + void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Create an RNTupleComposer for a single RNTuple. + /// + /// \param[in] ntuple The name and storage location of the RNTuple to process. + /// \param[in] compositionName The name to give to the composition. If empty, the name of the input RNTuple is used. + /// + /// \return A pointer to the newly created RNTupleComposer. + static std::unique_ptr Create(RNTupleOpenSpec ntuple, std::string_view compositionName = ""); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Create an RNTupleComposer for a *chain* (i.e., a vertical combination) of RNTuples. + /// + /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process. + /// \param[in] compositionName The name to give to the composition. If empty, the name of the first RNTuple is used. + /// + /// \return A pointer to the newly created RNTupleComposer. + static std::unique_ptr + CreateChain(std::vector ntuples, std::string_view compositionName = ""); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Create an RNTupleComposer for a *chain* (i.e., a vertical combination) of other RNTupleComposers. + /// + /// \param[in] innerCompositions A list with the composers to chain. + /// \param[in] compositionName The name to give to the composition. If empty, the name of the first inner composition + /// is used. + /// + /// \return A pointer to the newly created RNTupleComposer. + static std::unique_ptr + CreateChain(std::vector> innerCompositions, std::string_view compositionName = ""); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Create an RNTupleComposer for a *join* (i.e., a horizontal composition) of RNTuples. + /// + /// \param[in] primaryNTuple The name and location of the primary RNTuple. + /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. + /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. + /// The join is made based on the combined join field values, and therefore each field has to be present in each + /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned. + /// \param[in] compositionName The name to give to the composition. If empty, the name of the primary RNTuple is + /// used. + /// + /// \return A pointer to the newly created RNTupleComposer. + static std::unique_ptr CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, + const std::vector &joinFields, + std::string_view compositionName = ""); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Create an RNTupleComposer for a *join* (i.e., a horizontal composition) of RNTuples. + /// + /// \param[in] primaryComposition The primary composition. + /// \param[in] auxComposition The composition to join the primary compostion with. + /// \param[in] joinFields The names of the fields on which to join, in case the entries of the primary and auxiliary + /// compositions are unaligned. The join is made based on the combined join field values, and therefore each + /// field has to be present in each specified composition. If an empty list is provided, it is assumed that the + /// compositions are fully aligned. + /// \param[in] compositionName Name of the composed RNTuple. Unless specified otherwise in + /// RNTupleComposer::CreateJoin, this is the name of the primary composition. + /// + /// \return A pointer to the newly created RNTupleComposer. + static std::unique_ptr + CreateJoin(std::unique_ptr primaryComposition, std::unique_ptr auxComposition, + const std::vector &joinFields, std::string_view compositionName = ""); +}; + +// clang-format off +/** +\class ROOT::Experimental::RNTupleSingleComposer +\ingroup NTuple +\brief Composer specialization for processing a single RNTuple. +*/ +// clang-format on +class RNTupleSingleComposer : public RNTupleComposer { + friend class RNTupleComposer; + +private: + RNTupleOpenSpec fNTupleSpec; + std::unique_ptr fPageSource; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Create a new field and connect it to the composition's page source. + /// + /// \param[in] qualifiedFieldName Name of the field to add, prefixed with its parent fields, if applicable. + /// \param[in] typeName Type of the field to add. + /// + /// \return The newly created field. + /// \throws ROOT::RException In case the requested field cannot be found on disk. + std::unique_ptr + CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Initialize the composer by creating an (initially empty) `fEntry`, or setting an existing one. + /// + /// At this point, the page source for the underlying RNTuple will be created and opened. + void Initialize(std::shared_ptr entry = nullptr) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Connect the provided fields indices in the entry to their on-disk fields. + void Connect(const std::unordered_set &fieldIdxs, + const Internal::RNTupleCompositionProvenance &provenance = Internal::RNTupleCompositionProvenance(), + bool updateFields = false) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in the + /// (chained) composition). + /// + /// \sa ROOT::Experimental::RNTupleComposer::LoadEntry + ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the total number of entries in this composion. + ROOT::NTupleSize_t GetNEntries() final + { + Initialize(); + if (fNEntries == ROOT::kInvalidNTupleIndex) + Connect(fFieldIdxs); + return fNEntries; + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Check if a field exists on-disk of the underlying RNTUple. + /// + /// \sa RNTupleComposer::CanReadFieldFromDisk() + bool CanReadFieldFromDisk(std::string_view fieldName) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add a field to the entry. + /// + /// \sa RNTupleComposer::AddFieldToEntry() + Internal::RNTupleComposerEntry::FieldIndex_t AddFieldToEntry( + const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, + const Internal::RNTupleCompositionProvenance &provenance = Internal::RNTupleCompositionProvenance()) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add the entry mappings for this composition to the provided join table. + /// + /// \sa ROOT::Experimental::RNTupleComposer::AddEntriesToJoinTable + void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Composer-specific implementation for printing its structure, called by PrintStructure(). + /// + /// \sa ROOT::Experimental::RNTupleComposer::PrintStructureImpl + void PrintStructureImpl(std::ostream &output) const final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Construct a new RNTupleComposer for processing a single RNTuple. + /// + /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process. + /// \param[in] compositionName Name of the RNTuple to use in the composition. Unless specified otherwise in + /// RNTupleComposer::Create, this is the on-disk name of the underlying RNTuple. + RNTupleSingleComposer(RNTupleOpenSpec ntuple, std::string_view compositionName); + +public: + RNTupleSingleComposer(const RNTupleSingleComposer &) = delete; + RNTupleSingleComposer(RNTupleSingleComposer &&) = delete; + RNTupleSingleComposer &operator=(const RNTupleSingleComposer &) = delete; + RNTupleSingleComposer &operator=(RNTupleSingleComposer &&) = delete; + ~RNTupleSingleComposer() override + { + // The entry's fields need to be deleted before fPageSource. + if (fEntry) + fEntry->Clear(); + }; +}; + +// clang-format off +/** +\class ROOT::Experimental::RNTupleChainComposer +\ingroup NTuple +\brief Composer specialization for vertically combined (*chained*) RNTupleComposers. +*/ +// clang-format on +class RNTupleChainComposer : public RNTupleComposer { + friend class RNTupleComposer; + +private: + std::vector> fInnerCompositions; + std::vector fInnerNEntries; + + Internal::RNTupleCompositionProvenance fProvenance; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Initialize the composer by creating an (initially empty) `fEntry`, or setting an existing one. + void Initialize(std::shared_ptr entry = nullptr) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Connect the provided fields indices in the entry to their on-disk fields. + /// + /// \sa RNTupleComposer::Connect() + void Connect(const std::unordered_set &fieldIdxs, + const Internal::RNTupleCompositionProvenance &provenance = Internal::RNTupleCompositionProvenance(), + bool updateFields = false) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Update the entry to read values from the composition at the provided index. + void ConnectInnerComposition(std::size_t chainIdx); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all compositions in + /// the chain). + /// + /// \sa ROOT::Experimental::RNTupleComposer::LoadEntry + ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the total number of entries in this composition. + /// + /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly! + ROOT::NTupleSize_t GetNEntries() final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Check if a field exists on-disk and can be read by the composition. + /// + /// \sa RNTupleComposer::CanReadFieldFromDisk() + bool CanReadFieldFromDisk(std::string_view fieldName) final + { + return fInnerCompositions[fCurrentChainIndex]->CanReadFieldFromDisk(fieldName); + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add a field to the entry. + /// + /// \sa RNTupleComposer::AddFieldToEntry() + Internal::RNTupleComposerEntry::FieldIndex_t AddFieldToEntry( + const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, + const Internal::RNTupleCompositionProvenance &provenance = Internal::RNTupleCompositionProvenance()) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add the entry mappings for this composition to the provided join table. + /// + /// \sa ROOT::Experimental::RNTupleComposer::AddEntriesToJoinTable + void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Composer-specific implementation for printing its structure, called by PrintStructure(). + /// + /// \sa ROOT::Experimental::RNTupleComposer::PrintStructureImpl + void PrintStructureImpl(std::ostream &output) const final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Construct a new RNTupleChainComposer. + /// + /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process. + /// \param[in] compositionName Name of the composed RNTuple. Unless specified otherwise in + /// RNTupleComposer::CreateChain, this is the name of the first composer in the chain. + /// + /// RNTuples are processed in the order in which they are specified. + RNTupleChainComposer(std::vector> compositions, std::string_view compositionName); + +public: + RNTupleChainComposer(const RNTupleChainComposer &) = delete; + RNTupleChainComposer(RNTupleChainComposer &&) = delete; + RNTupleChainComposer &operator=(const RNTupleChainComposer &) = delete; + RNTupleChainComposer &operator=(RNTupleChainComposer &&) = delete; + ~RNTupleChainComposer() override = default; +}; + +// clang-format off +/** +\class ROOT::Experimental::RNTupleJoinComposer +\ingroup NTuple +\brief Composer specialization for horizontally combined (*joined*) RNTupleComposers. +*/ +// clang-format on +class RNTupleJoinComposer : public RNTupleComposer { + friend class RNTupleComposer; + +private: + std::unique_ptr fPrimaryComposition; + std::unique_ptr fAuxiliaryComposition; + + std::vector fJoinFieldNames; + std::set fJoinFieldIdxs; + + std::unique_ptr fJoinTable; + bool fJoinTableIsBuilt = false; + + std::unordered_set fAuxiliaryFieldIdxs; + + /// \brief Initialize the composition by creating an (initially empty) `fEntry`, or setting an existing one. + void Initialize(std::shared_ptr entry = nullptr) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Connect the provided fields indices in the entry to their on-disk fields. + /// + /// \sa RNTupleComposer::Connect() + void Connect(const std::unordered_set &fieldIdxs, + const Internal::RNTupleCompositionProvenance &provenance = Internal::RNTupleCompositionProvenance(), + bool updateFields = false) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Load the entry identified by the provided entry number of the primary composition. + /// + /// \sa ROOT::Experimental::RNTupleComposer::LoadEntry + ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Get the total number of entries in this composition. + ROOT::NTupleSize_t GetNEntries() final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Set the validity for all fields in the auxiliary composition at once. + void SetAuxiliaryFieldValidity(bool validity); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Check if a field exists on-disk and can be read by the composition. + /// + /// \sa RNTupleComposer::CanReadFieldFromDisk() + bool CanReadFieldFromDisk(std::string_view fieldName) final + { + if (!fPrimaryComposition->CanReadFieldFromDisk(fieldName)) { + if (fieldName.find(fAuxiliaryComposition->GetCompositionName()) == 0) + fieldName = fieldName.substr(fAuxiliaryComposition->GetCompositionName().size() + 1); + return fAuxiliaryComposition->CanReadFieldFromDisk(fieldName); + } + + return true; + } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add a field to the entry. + /// + /// \sa RNTupleComposer::AddFieldToEntry() + Internal::RNTupleComposerEntry::FieldIndex_t AddFieldToEntry( + const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, + const Internal::RNTupleCompositionProvenance &provenance = Internal::RNTupleCompositionProvenance()) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Add the entry mappings for this composition to the provided join table. + /// + /// \sa ROOT::Experimental::RNTupleComposer::AddEntriesToJoinTable + void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Composer-specific implementation for printing its structure, called by PrintStructure(). + /// + /// \sa ROOT::Experimental::RNTupleComposer::PrintStructureImpl + void PrintStructureImpl(std::ostream &output) const final; + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Construct a new RNTupleJoinComposer. + /// \param[in] primaryComposition The primary composition. + /// \param[in] auxComposition The composition to join the primary compostion with. + /// \param[in] joinFields The names of the fields on which to join, in case the entries of the primary and auxiliary + /// compositions are unaligned. The join is made based on the combined join field values, and therefore each + /// field has to be present in each specified composition. If an empty list is provided, it is assumed that the + /// compositions are fully aligned. + /// \param[in] compositionName Name of the composed RNTuple. Unless specified otherwise in + /// RNTupleComposer::CreateJoin, this is the name of the primary composition. + RNTupleJoinComposer(std::unique_ptr primaryComposition, + std::unique_ptr auxComposition, const std::vector &joinFields, + std::string_view compositionName); + +public: + RNTupleJoinComposer(const RNTupleJoinComposer &) = delete; + RNTupleJoinComposer operator=(const RNTupleJoinComposer &) = delete; + RNTupleJoinComposer(RNTupleJoinComposer &&) = delete; + RNTupleJoinComposer operator=(RNTupleJoinComposer &&) = delete; + ~RNTupleJoinComposer() override = default; +}; + +} // namespace Experimental +} // namespace ROOT + +#endif // ROOT_RNTupleComposer diff --git a/tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx b/tree/ntuple/inc/ROOT/RNTupleComposerEntry.hxx similarity index 74% rename from tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx rename to tree/ntuple/inc/ROOT/RNTupleComposerEntry.hxx index d72ac8f282247..d6ef5bc241a1f 100644 --- a/tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleComposerEntry.hxx @@ -1,4 +1,4 @@ -/// \file ROOT/RNTupleProcessor.hxx +/// \file ROOT/RNTupleComposer.hxx /// \author Florine de Geus /// \date 2025-06-25 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback @@ -12,8 +12,8 @@ * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ -#ifndef ROOT_RNTupleProcessorEntry -#define ROOT_RNTupleProcessorEntry +#ifndef ROOT_RNTupleComposerEntry +#define ROOT_RNTupleComposerEntry #include @@ -27,45 +27,45 @@ namespace ROOT { namespace Experimental { namespace Internal { /** -\class ROOT::Experimental::RNTupleProcessorProvenance +\class ROOT::Experimental::RNTupleCompositionProvenance \ingroup NTuple -\brief Identifies how a processor is composed. +\brief Identifies the provenance of a composed RNTuple. -The processor provenance is used in RNTupleProcessorEntry to identify how an (auxiliary) field in a composed processor +The composition provenance is used in RNTupleComposerEntry to identify how an (auxiliary) field in a composed RNTuple can be accessed. */ // clang-format on -class RNTupleProcessorProvenance { +class RNTupleCompositionProvenance { private: std::string fProvenance{}; public: - RNTupleProcessorProvenance() = default; - RNTupleProcessorProvenance(const std::string &provenance) : fProvenance(provenance) {} + RNTupleCompositionProvenance() = default; + RNTupleCompositionProvenance(const std::string &provenance) : fProvenance(provenance) {} ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the full processor provenance, in the form of "x.y.z". + /// \brief Get the full composer provenance, in the form of "x.y.z". std::string Get() const { return fProvenance; } ///////////////////////////////////////////////////////////////////////////// - /// \brief Add a new processor to the provenance. + /// \brief Add a new composer to the provenance. /// - /// \param[in] processorName Name of the processor to add. + /// \param[in] ntupleName Name of the composed RNTuple to add. /// /// \return The updated provenance. - RNTupleProcessorProvenance Evolve(const std::string &processorName) const + RNTupleCompositionProvenance Evolve(const std::string &ntupleName) const { if (fProvenance.empty()) - return RNTupleProcessorProvenance(processorName); + return RNTupleCompositionProvenance(ntupleName); - return RNTupleProcessorProvenance(fProvenance + "." + processorName); + return RNTupleCompositionProvenance(fProvenance + "." + ntupleName); } ///////////////////////////////////////////////////////////////////////////// /// \brief Check whether the provenance subsumes the provenance in `other`. /// /// \param[in] other The other provenance - bool Contains(const RNTupleProcessorProvenance &other) const + bool Contains(const RNTupleCompositionProvenance &other) const { return fProvenance.rfind(other.fProvenance) != std::string::npos; } @@ -82,37 +82,37 @@ public: // clang-format off /** -\class ROOT::Experimental::Internal::RNTupleProcessorEntry +\class ROOT::Experimental::Internal::RNTupleComposerEntry \ingroup NTuple -\brief Collection of values in an RNTupleProcessor, analogous to REntry, with checks and support for missing values. +\brief Collection of values in an RNTupleComposer, analogous to REntry, with checks and support for missing values. */ // clang-format on -class RNTupleProcessorEntry { +class RNTupleComposerEntry { public: // We don't use RFieldTokens here, because it (semantically) does not make sense for the entry to be fixed to the // schema ID of a particular model. using FieldIndex_t = std::uint64_t; private: - struct RProcessorValue { + struct RComposerValue { std::unique_ptr fField; std::string fQualifiedFieldName; ROOT::RFieldBase::RValue fValue; bool fIsValid; - RNTupleProcessorProvenance fProcessorProvenance; + RNTupleCompositionProvenance fCompositionProvenance; - RProcessorValue(std::unique_ptr field, std::string_view qualifiedFieldName, - ROOT::RFieldBase::RValue &&value, bool isValid, RNTupleProcessorProvenance provenance) + RComposerValue(std::unique_ptr field, std::string_view qualifiedFieldName, + ROOT::RFieldBase::RValue &&value, bool isValid, RNTupleCompositionProvenance provenance) : fField(std::move(field)), fQualifiedFieldName(qualifiedFieldName), fValue(std::move(value)), fIsValid(isValid), - fProcessorProvenance(provenance) + fCompositionProvenance(provenance) { } }; - std::vector fProcessorValues; + std::vector fComposerValues; // Maps from the field name to all type alternatives for that field that have been added to the entry. std::unordered_map> fFieldName2Index; @@ -121,7 +121,7 @@ public: /// \brief Clear all fields from the entry. void Clear() { - fProcessorValues.clear(); + fComposerValues.clear(); fFieldName2Index.clear(); } @@ -132,8 +132,8 @@ public: /// \param[in] isValid The new validity of the field. void SetFieldValidity(FieldIndex_t fieldIdx, bool isValid) { - assert(fieldIdx < fProcessorValues.size()); - fProcessorValues[fieldIdx].fIsValid = isValid; + assert(fieldIdx < fComposerValues.size()); + fComposerValues[fieldIdx].fIsValid = isValid; } ///////////////////////////////////////////////////////////////////////////// @@ -142,8 +142,8 @@ public: /// \param[in] fieldIdx The index of the field in the entry. bool IsValidField(FieldIndex_t fieldIdx) const { - assert(fieldIdx < fProcessorValues.size()); - return fProcessorValues[fieldIdx].fIsValid; + assert(fieldIdx < fComposerValues.size()); + return fComposerValues[fieldIdx].fIsValid; } ///////////////////////////////////////////////////////////////////////////// @@ -157,7 +157,7 @@ public: ///////////////////////////////////////////////////////////////////////////// /// \brief Find the field index of the provided field in the entry. /// - /// \param[in] canonicalFieldName The name of the field in the entry, including its processor name prefixes and + /// \param[in] canonicalFieldName The name of the field in the entry, including its composition prefixes and /// parent field names, if applicable. /// /// \return A `std::optional` containing the field index if it was found. @@ -170,11 +170,11 @@ public: /// \param[in] field Reference to the field to add, used to to create its corresponding RValue. /// \param[in] valuePtr Pointer to an object corresponding to the field's type to bind to its value. If this is a /// `nullptr`, a pointer will be created. - /// \param[in] provenance Processor provenance of the field. + /// \param[in] provenance Composition provenance of the field. /// /// \return The field index of the newly added field. FieldIndex_t AddField(const std::string &qualifiedFieldName, std::unique_ptr field, void *valuePtr, - const RNTupleProcessorProvenance &provenance); + const RNTupleCompositionProvenance &provenance); ///////////////////////////////////////////////////////////////////////////// /// \brief Update a field in the entry, preserving the value pointer. @@ -199,18 +199,18 @@ public: const ROOT::RFieldBase::RValue &GetValue(FieldIndex_t fieldIdx) const { - assert(fieldIdx < fProcessorValues.size()); - return fProcessorValues[fieldIdx].fValue; + assert(fieldIdx < fComposerValues.size()); + return fComposerValues[fieldIdx].fValue; } ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the processor provenance of a field in the entry. + /// \brief Get the composition provenance of a field in the entry. /// /// \param[in] fieldIdx The index of the field in the entry. - const RNTupleProcessorProvenance &GetFieldProvenance(FieldIndex_t fieldIdx) const + const RNTupleCompositionProvenance &GetCompositionProvenance(FieldIndex_t fieldIdx) const { - assert(fieldIdx < fProcessorValues.size()); - return fProcessorValues[fieldIdx].fProcessorProvenance; + assert(fieldIdx < fComposerValues.size()); + return fComposerValues[fieldIdx].fCompositionProvenance; } ///////////////////////////////////////////////////////////////////////////// @@ -219,8 +219,8 @@ public: /// \param[in] fieldIdx The index of the field in the entry. std::string GetQualifiedFieldName(FieldIndex_t fieldIdx) const { - assert(fieldIdx < fProcessorValues.size()); - return fProcessorValues[fieldIdx].fQualifiedFieldName; + assert(fieldIdx < fComposerValues.size()); + return fComposerValues[fieldIdx].fQualifiedFieldName; } ///////////////////////////////////////////////////////////////////////////// @@ -231,4 +231,4 @@ public: } // namespace Experimental } // namespace ROOT -#endif // ROOT_RNTupleProcessorEntry +#endif // ROOT_RNTupleComposerEntry diff --git a/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx b/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx index 5598260ad3ae5..28b270e2bba51 100644 --- a/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx @@ -15,440 +15,35 @@ #ifndef ROOT_RNTupleProcessor #define ROOT_RNTupleProcessor -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include +#include namespace ROOT { namespace Experimental { -namespace Internal { -struct RNTupleProcessorEntryLoader; -} // namespace Internal - -// clang-format off -/** -\class ROOT::Experimental::RNTupleOpenSpec -\ingroup NTuple -\brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor. - -An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the -TDirectory (or any of its subclasses) that contains the RNTuple. - -Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python. -*/ -// clang-format on -class RNTupleOpenSpec { - friend class RNTupleProcessor; - friend class RNTupleSingleProcessor; - friend class RNTupleJoinProcessor; - -private: - std::string fNTupleName; - std::variant fStorage; - -public: - RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {} - RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {} - - std::unique_ptr CreatePageSource() const; -}; - -// clang-format off -/** -\class ROOT::Experimental::RNTupleProcessorOptionalPtr -\ingroup NTuple -\brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support -and checks for missing values. -*/ -// clang-format on -template -class RNTupleProcessorOptionalPtr { - friend class RNTupleProcessor; - -private: - Internal::RNTupleProcessorEntry *fProcessorEntry; - Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex; - - RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, - Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx) - : fProcessorEntry(processorEntry), fFieldIndex(fieldIdx) - { - } - -public: - ///////////////////////////////////////////////////////////////////////////// - /// \brief Check if the pointer currently holds a valid value. - bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get a shared pointer to the field value managed by the processor's entry. - /// - /// \return A `std::shared_ptr` if the field is valid in the current entry, or a `nullptr` otherwise. - std::shared_ptr GetPtr() const - { - if (fProcessorEntry->IsValidField(fFieldIndex)) { - const auto &value = fProcessorEntry->GetValue(fFieldIndex); - return value.template GetPtr(); - } - - return nullptr; - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get a non-owning pointer to the field value managed by the processor's entry. - /// - /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise. - T *GetRawPtr() const { return GetPtr().get(); } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Bind the value to `valuePtr`. - /// - /// \param[in] valuePtr Pointer to bind the value to. - /// - /// \warning Use this function with care! Values may not always be valid for every entry during processing, for - /// example when a field is not present in one of the chained processors or when during a join operation, no matching - /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading - /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing - /// its data through this interface, to ensure that only valid data can be read. - void BindRawPtr(T *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get a reference to the field value managed by the processor's entry. - /// - /// Throws an exception if the field is invalid in the processor's current entry. - const T &operator*() const - { - if (auto ptr = GetPtr()) - return *ptr; - else - throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) + - "\" because it has no value for the current entry")); - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Access the field value managed by the processor's entry. - /// - /// Throws an exception if the field is invalid in the processor's current entry. - const T *operator->() const - { - if (auto ptr = GetPtr()) - return ptr.get(); - else - throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) + - "\" because it has no value for the current entry")); - } -}; - -// clang-format off -/** -\class ROOT::Experimental::RNTupleProcessorOptionalPtr -\ingroup NTuple -\brief Specialization of RNTupleProcessorOptionalPtr for `void`-type pointers. -*/ -// clang-format on -template <> -class RNTupleProcessorOptionalPtr { - friend class RNTupleProcessor; - -private: - Internal::RNTupleProcessorEntry *fProcessorEntry; - Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex; - - RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, - Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx) - : fProcessorEntry(processorEntry), fFieldIndex(fieldIdx) - { - } - -public: - ///////////////////////////////////////////////////////////////////////////// - /// \brief Check if the pointer currently holds a valid value. - bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the pointer to the field value managed by the processor's entry. - /// - /// \return A `std::shared_ptr` if the field is valid in the current entry, or a `nullptr` otherwise. - std::shared_ptr GetPtr() const - { - if (fProcessorEntry->IsValidField(fFieldIndex)) { - const auto &value = fProcessorEntry->GetValue(fFieldIndex); - return value.template GetPtr(); - } - - return nullptr; - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get a non-owning pointer to the field value managed by the processor's entry. - /// - /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise. - void *GetRawPtr() const { return GetPtr().get(); } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Bind the value to `valuePtr`. - /// - /// \param[in] valuePtr Pointer to bind the value to. - /// - /// \warning Use this function with care! Values may not always be valid for every entry during processing, for - /// example when a field is not present in one of the chained processors or when during a join operation, no matching - /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading - /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing - /// its data through this interface, to ensure that only valid data can be read. - void BindRawPtr(void *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); } -}; - -// clang-format off -/** -\class ROOT::Experimental::RNTupleProcessor -\ingroup NTuple -\brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples. - -Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples): - -~~~{.cpp} -#include -using ROOT::Experimental::RNTupleProcessor; -using ROOT::Experimental::RNTupleOpenSpec; - -std::vector ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}}; -auto processor = RNTupleProcessor::CreateChain(ntuples); - -auto pt = processor->RequestField("pt"); - -for (const auto idx : *processor) { - std::cout << "event = " << idx << ", pt = " << *pt << std::endl; -} -~~~ - -An RNTupleProcessor is created either: -1. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple; -2. By providing a previously created RNTupleProcessor. - -The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the -processor, i.e. taking into account previously processed RNTuples. - -Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because -it can occur that entries are only partially complete in a join-based processor, field values may be marked as -"invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr -that is returned by RequestField(). -*/ -// clang-format on class RNTupleProcessor { - friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader; // for unit tests - friend class RNTupleSingleProcessor; - friend class RNTupleChainProcessor; - friend class RNTupleJoinProcessor; - -protected: - std::string fProcessorName; - std::shared_ptr fEntry = nullptr; - std::unordered_set fFieldIdxs; - - /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public - /// interface. - ROOT::NTupleSize_t fNEntries = kInvalidNTupleIndex; - - ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far - ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number - std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. - virtual void Initialize(std::shared_ptr entry) = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Check if the processor already has been initialized. - bool IsInitialized() const { return fEntry != nullptr; } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Connect fields to the page source of the processor's underlying RNTuple(s). - /// - /// \param[in] fieldIdxs Indices of the fields to connect. - /// \param[in] provenance Provenance of the processor. - /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying - /// RNTuple source changed. - virtual void Connect(const std::unordered_set &fieldIdxs, - const Internal::RNTupleProcessorProvenance &provenance, bool updateFields) = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Load the entry identified by the provided entry number. - /// - /// \param[in] entryNumber Entry number to load - /// - /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise. - virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the total number of entries in this processor - virtual ROOT::NTupleSize_t GetNEntries() = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Check if a field exists on-disk and can be read by the processor. - /// - /// \param[in] fieldName Name of the field to check. - virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add a field to the entry. - /// - /// - /// \param[in] fieldName Name of the field to add. - /// \param[in] typeName Type of the field to add. - /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be - /// created. - /// \param[in] provenance Provenance of the processor. - /// - /// \return The index of the newly added field in the entry. - /// - /// In case the field was already present in the entry, the index of the existing field is returned. - virtual Internal::RNTupleProcessorEntry::FieldIndex_t - AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr, - const Internal::RNTupleProcessorProvenance &provenance) = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add the entry mappings for this processor to the provided join table. - /// - /// \param[in] joinTable the join table to map the entries to. - /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use - /// with respect to the processor's position in the chain. - virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). - /// - /// \param[in,out] output Output stream to print to. - virtual void PrintStructureImpl(std::ostream &output) const = 0; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Create a new base RNTupleProcessor. - /// - /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for - /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary - /// RNTuple for RNTupleJoinProcessor. - RNTupleProcessor(std::string_view processorName) : fProcessorName(processorName) {} +private: + RNTupleComposer *fComposer; + ROOT::NTupleSize_t fNEntriesProcessed = 0; public: - RNTupleProcessor(const RNTupleProcessor &) = delete; - RNTupleProcessor(RNTupleProcessor &&) = delete; - RNTupleProcessor &operator=(const RNTupleProcessor &) = delete; - RNTupleProcessor &operator=(RNTupleProcessor &&) = delete; - virtual ~RNTupleProcessor() = default; + RNTupleProcessor(RNTupleComposer &composer) : fComposer(&composer) {} - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the total number of entries processed so far. ROOT::NTupleSize_t GetNEntriesProcessed() const { return fNEntriesProcessed; } - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the entry number that is currently being processed. - ROOT::NTupleSize_t GetCurrentEntryNumber() const { return fCurrentEntryNumber; } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the number of the inner processor currently being read. - /// - /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned. - std::size_t GetCurrentProcessorNumber() const { return fCurrentProcessorNumber; } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the name of the processor. - /// - /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying - /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the - /// primary processor for RNTupleJoinProcessor. - const std::string &GetProcessorName() const { return fProcessorName; } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Request access to a field for reading during processing. - /// - /// \tparam T Type of the requested field. - /// - /// \param[in] fieldName Name of the requested field. - /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be - /// created. - /// - /// \return An RNTupleProcessorOptionalPtr of type `T`, which provides access to the field's value. - /// - /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for - /// example when a field is not present in one of the chained processors or when during a join operation, no matching - /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading - /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through - /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read. - template - RNTupleProcessorOptionalPtr RequestField(const std::string &fieldName, void *valuePtr = nullptr) - { - Initialize(fEntry); - std::string typeName{}; - if constexpr (!std::is_void_v) { - typeName = ROOT::Internal::GetRenormalizedTypeName(typeid(T)); - } - auto fieldIdx = AddFieldToEntry(fieldName, typeName, valuePtr, Internal::RNTupleProcessorProvenance()); - return RNTupleProcessorOptionalPtr(fEntry.get(), fieldIdx); - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Request access to a field for reading during processing. - /// - /// \param[in] fieldName Name of the requested field. - /// \param[in] typeName Type of the requested field. - /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be - /// created. - /// - /// \return An void-type RNTupleProcessorOptionalPtr, which provides access to the field's value. - /// - /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for - /// example when a field is not present in one of the chained processors or when during a join operation, no matching - /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading - /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through - /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read. - RNTupleProcessorOptionalPtr - RequestField(const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr) - { - Initialize(fEntry); - auto fieldIdx = AddFieldToEntry(fieldName, typeName, valuePtr, Internal::RNTupleProcessorProvenance()); - return RNTupleProcessorOptionalPtr(fEntry.get(), fieldIdx); - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Print a graphical representation of the processor composition. - /// - /// \param[in,out] output Stream to print to (default is stdout). - /// - /// ### Example: - /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary - /// RNTuples will be printed as follows: - /// ~~~ - /// +-----------------------------+ +-----------------------------+ - /// | ntuple | | ntuple_aux | - /// | ntuple.root | | ntuple_aux1.root | - /// +-----------------------------+ +-----------------------------+ - /// +-----------------------------+ - /// | ntuple_aux | - /// | ntuple_aux2.root | - /// +-----------------------------+ - /// ~~~ - void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); } + RNTupleComposer &GetComposer() { return *fComposer; } + const RNTupleComposer &GetComposer() const { return *fComposer; } // clang-format off /** - \class ROOT::Experimental::RNTupleProcessor::RIterator + \class ROOT::Experimental::RNTupleComposer::RIterator \ingroup NTuple - \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof. + \brief Iterator over the entries of a composed RNTuple. */ // clang-format on class RIterator { private: RNTupleProcessor &fProcessor; + RNTupleComposer &fComposer; ROOT::NTupleSize_t fCurrentEntryNumber; public: @@ -460,23 +55,27 @@ public: using reference = ROOT::NTupleSize_t &; RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber) - : fProcessor(processor), fCurrentEntryNumber(entryNumber) + : fProcessor(processor), fComposer(fProcessor.GetComposer()), fCurrentEntryNumber(entryNumber) { - if (!fProcessor.fEntry) { + if (!fComposer.fEntry) { fCurrentEntryNumber = ROOT::kInvalidNTupleIndex; } - // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already + // This constructor is called with kInvalidNTupleIndex for RNTupleComposer::end(). In that case, we already // know there is nothing to load. if (fCurrentEntryNumber != ROOT::kInvalidNTupleIndex) { - fProcessor.Connect(fProcessor.fEntry->GetFieldIndices(), Internal::RNTupleProcessorProvenance(), - /*updateFields=*/false); - fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber); + fComposer.Connect(fComposer.fEntry->GetFieldIndices(), Internal::RNTupleCompositionProvenance(), + /*updateFields=*/false); + fCurrentEntryNumber = fComposer.LoadEntry(fCurrentEntryNumber); + if (fCurrentEntryNumber != ROOT::kInvalidNTupleIndex) + fProcessor.fNEntriesProcessed++; } } iterator operator++() { - fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber + 1); + fCurrentEntryNumber = fComposer.LoadEntry(fCurrentEntryNumber + 1); + if (fCurrentEntryNumber != ROOT::kInvalidNTupleIndex) + fProcessor.fNEntriesProcessed++; return *this; } @@ -501,369 +100,7 @@ public: RIterator begin() { return RIterator(*this, 0); } RIterator end() { return RIterator(*this, ROOT::kInvalidNTupleIndex); } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Create an RNTupleProcessor for a single RNTuple. - /// - /// \param[in] ntuple The name and storage location of the RNTuple to process. - /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used. - /// - /// \return A pointer to the newly created RNTupleProcessor. - static std::unique_ptr Create(RNTupleOpenSpec ntuple, std::string_view processorName = ""); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples. - /// - /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process. - /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used. - /// - /// \return A pointer to the newly created RNTupleProcessor. - static std::unique_ptr - CreateChain(std::vector ntuples, std::string_view processorName = ""); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors. - /// - /// \param[in] innerProcessors A list with the processors to chain. - /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is - /// used. - /// - /// \return A pointer to the newly created RNTupleProcessor. - static std::unique_ptr - CreateChain(std::vector> innerProcessors, std::string_view processorName = ""); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples. - /// - /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential - /// order. - /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which - /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential. - /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. - /// The join is made based on the combined join field values, and therefore each field has to be present in each - /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned. - /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used. - /// - /// \return A pointer to the newly created RNTupleProcessor. - static std::unique_ptr CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, - const std::vector &joinFields, - std::string_view processorName = ""); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples. - /// - /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order. - /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are - /// processed is determined by the primary processor and doesn't necessarily have to be sequential. - /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned. - /// The join is made based on the combined join field values, and therefore each field has to be present in each - /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully - /// aligned. - /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used. - /// - /// \return A pointer to the newly created RNTupleProcessor. - static std::unique_ptr - CreateJoin(std::unique_ptr primaryProcessor, std::unique_ptr auxProcessor, - const std::vector &joinFields, std::string_view processorName = ""); -}; - -// clang-format off -/** -\class ROOT::Experimental::RNTupleSingleProcessor -\ingroup NTuple -\brief Processor specialization for processing a single RNTuple. -*/ -// clang-format on -class RNTupleSingleProcessor : public RNTupleProcessor { - friend class RNTupleProcessor; - -private: - RNTupleOpenSpec fNTupleSpec; - std::unique_ptr fPageSource; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Create a new field and connect it to the processor's page source. - /// - /// \param[in] qualifiedFieldName Name of the field to add, prefixed with its parent fields, if applicable. - /// \param[in] typeName Type of the field to add. - /// - /// \return The newly created field. - /// \throws ROOT::RException In case the requested field cannot be found on disk. - std::unique_ptr - CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. - /// - /// At this point, the page source for the underlying RNTuple of the processor will be created and opened. - void Initialize(std::shared_ptr entry = nullptr) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Connect the provided fields indices in the entry to their on-disk fields. - void Connect(const std::unordered_set &fieldIdxs, - const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(), - bool updateFields = false) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this - /// processor). - /// - /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry - ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the total number of entries in this processor. - ROOT::NTupleSize_t GetNEntries() final - { - Initialize(); - if (fNEntries == ROOT::kInvalidNTupleIndex) - Connect(fFieldIdxs); - return fNEntries; - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Check if a field exists on-disk and can be read by the processor. - /// - /// \sa RNTupleProcessor::CanReadFieldFromDisk() - bool CanReadFieldFromDisk(std::string_view fieldName) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add a field to the entry. - /// - /// \sa RNTupleProcessor::AddFieldToEntry() - Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry( - const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, - const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add the entry mappings for this processor to the provided join table. - /// - /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable - void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). - /// - /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl - void PrintStructureImpl(std::ostream &output) const final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Construct a new RNTupleProcessor for processing a single RNTuple. - /// - /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process. - /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is - /// the name of the underlying RNTuple. - RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName); - -public: - RNTupleSingleProcessor(const RNTupleSingleProcessor &) = delete; - RNTupleSingleProcessor(RNTupleSingleProcessor &&) = delete; - RNTupleSingleProcessor &operator=(const RNTupleSingleProcessor &) = delete; - RNTupleSingleProcessor &operator=(RNTupleSingleProcessor &&) = delete; - ~RNTupleSingleProcessor() override - { - // The entry's fields need to be deleted before fPageSource. - if (fEntry) - fEntry->Clear(); - }; }; - -// clang-format off -/** -\class ROOT::Experimental::RNTupleChainProcessor -\ingroup NTuple -\brief Processor specialization for vertically combined (*chained*) RNTupleProcessors. -*/ -// clang-format on -class RNTupleChainProcessor : public RNTupleProcessor { - friend class RNTupleProcessor; - -private: - std::vector> fInnerProcessors; - std::vector fInnerNEntries; - - Internal::RNTupleProcessorProvenance fProvenance; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. - void Initialize(std::shared_ptr entry = nullptr) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Connect the provided fields indices in the entry to their on-disk fields. - /// - /// \sa RNTupleProcessor::Connect() - void Connect(const std::unordered_set &fieldIdxs, - const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(), - bool updateFields = false) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Update the entry to reflect any missing fields in the current inner processor. - void ConnectInnerProcessor(std::size_t processorNumber); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this - /// processor). - /// - /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry - ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the total number of entries in this processor. - /// - /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly! - ROOT::NTupleSize_t GetNEntries() final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Check if a field exists on-disk and can be read by the processor. - /// - /// \sa RNTupleProcessor::CanReadFieldFromDisk() - bool CanReadFieldFromDisk(std::string_view fieldName) final - { - return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName); - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add a field to the entry. - /// - /// \sa RNTupleProcessor::AddFieldToEntry() - Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry( - const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, - const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add the entry mappings for this processor to the provided join table. - /// - /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable - void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). - /// - /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl - void PrintStructureImpl(std::ostream &output) const final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Construct a new RNTupleChainProcessor. - /// - /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process. - /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this - /// is the name of the first inner processor. - /// - /// RNTuples are processed in the order in which they are specified. - RNTupleChainProcessor(std::vector> processors, std::string_view processorName); - -public: - RNTupleChainProcessor(const RNTupleChainProcessor &) = delete; - RNTupleChainProcessor(RNTupleChainProcessor &&) = delete; - RNTupleChainProcessor &operator=(const RNTupleChainProcessor &) = delete; - RNTupleChainProcessor &operator=(RNTupleChainProcessor &&) = delete; - ~RNTupleChainProcessor() override = default; -}; - -// clang-format off -/** -\class ROOT::Experimental::RNTupleJoinProcessor -\ingroup NTuple -\brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors. -*/ -// clang-format on -class RNTupleJoinProcessor : public RNTupleProcessor { - friend class RNTupleProcessor; - -private: - std::unique_ptr fPrimaryProcessor; - std::unique_ptr fAuxiliaryProcessor; - - std::vector fJoinFieldNames; - std::set fJoinFieldIdxs; - - std::unique_ptr fJoinTable; - bool fJoinTableIsBuilt = false; - - std::unordered_set fAuxiliaryFieldIdxs; - - /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. - void Initialize(std::shared_ptr entry = nullptr) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Connect the provided fields indices in the entry to their on-disk fields. - /// - /// \sa RNTupleProcessor::Connect() - void Connect(const std::unordered_set &fieldIdxs, - const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(), - bool updateFields = false) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Load the entry identified by the provided entry number of the primary processor. - /// - /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry - ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the total number of entries in this processor. - ROOT::NTupleSize_t GetNEntries() final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Set the validity for all fields in the auxiliary processor at once. - void SetAuxiliaryFieldValidity(bool validity); - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Check if a field exists on-disk and can be read by the processor. - /// - /// \sa RNTupleProcessor::CanReadFieldFromDisk() - bool CanReadFieldFromDisk(std::string_view fieldName) final - { - if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) { - if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0) - fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1); - return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName); - } - - return true; - } - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add a field to the entry. - /// - /// \sa RNTupleProcessor::AddFieldToEntry() - Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry( - const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, - const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Add the entry mappings for this processor to the provided join table. - /// - /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable - void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). - /// - /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl - void PrintStructureImpl(std::ostream &output) const final; - - ///////////////////////////////////////////////////////////////////////////// - /// \brief Construct a new RNTupleJoinProcessor. - /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order. - /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are - /// processed is determined by the primary processor and doesn't necessarily have to be sequential. - /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned. - /// The join is made based on the combined join field values, and therefore each field has to be present in each - /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned. - /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this - /// is the name of the primary processor. - RNTupleJoinProcessor(std::unique_ptr primaryProcessor, - std::unique_ptr auxProcessor, const std::vector &joinFields, - std::string_view processorName); - -public: - RNTupleJoinProcessor(const RNTupleJoinProcessor &) = delete; - RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete; - RNTupleJoinProcessor(RNTupleJoinProcessor &&) = delete; - RNTupleJoinProcessor operator=(RNTupleJoinProcessor &&) = delete; - ~RNTupleJoinProcessor() override = default; -}; - } // namespace Experimental } // namespace ROOT diff --git a/tree/ntuple/src/RNTupleProcessor.cxx b/tree/ntuple/src/RNTupleComposer.cxx similarity index 52% rename from tree/ntuple/src/RNTupleProcessor.cxx rename to tree/ntuple/src/RNTupleComposer.cxx index 6cfdd8d628862..da3c7b048f795 100644 --- a/tree/ntuple/src/RNTupleProcessor.cxx +++ b/tree/ntuple/src/RNTupleComposer.cxx @@ -1,4 +1,4 @@ -/// \file RNTupleProcessor.cxx +/// \file RNTupleComposer.cxx /// \author Florine de Geus /// \date 2024-03-26 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback @@ -12,7 +12,7 @@ * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ -#include +#include #include #include @@ -33,42 +33,43 @@ std::unique_ptr ROOT::Experimental::RNTupleOpenSpec return ROOT::Internal::RPageSourceFile::CreateFromAnchor(*ntuple); } -std::unique_ptr -ROOT::Experimental::RNTupleProcessor::Create(RNTupleOpenSpec ntuple, std::string_view processorName) +std::unique_ptr +ROOT::Experimental::RNTupleComposer::Create(RNTupleOpenSpec ntuple, std::string_view compositionName) { - return std::unique_ptr(new RNTupleSingleProcessor(std::move(ntuple), processorName)); + return std::unique_ptr(new RNTupleSingleComposer(std::move(ntuple), compositionName)); } -std::unique_ptr -ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector ntuples, std::string_view processorName) +std::unique_ptr +ROOT::Experimental::RNTupleComposer::CreateChain(std::vector ntuples, std::string_view compositionName) { if (ntuples.empty()) throw RException(R__FAIL("at least one RNTuple must be provided")); - std::vector> innerProcessors; - innerProcessors.reserve(ntuples.size()); + std::vector> innerCompositions; + innerCompositions.reserve(ntuples.size()); for (auto &ntuple : ntuples) { - innerProcessors.emplace_back(Create(std::move(ntuple))); + innerCompositions.emplace_back(Create(std::move(ntuple))); } - return CreateChain(std::move(innerProcessors), processorName); + return CreateChain(std::move(innerCompositions), compositionName); } -std::unique_ptr -ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector> innerProcessors, - std::string_view processorName) +std::unique_ptr +ROOT::Experimental::RNTupleComposer::CreateChain(std::vector> innerCompositions, + std::string_view compositionName) { - if (innerProcessors.empty()) - throw RException(R__FAIL("at least one inner processor must be provided")); + if (innerCompositions.empty()) + throw RException(R__FAIL("at least one inner composition must be provided")); - return std::unique_ptr(new RNTupleChainProcessor(std::move(innerProcessors), processorName)); + return std::unique_ptr( + new RNTupleChainComposer(std::move(innerCompositions), compositionName)); } -std::unique_ptr -ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, - const std::vector &joinFields, - std::string_view processorName) +std::unique_ptr +ROOT::Experimental::RNTupleComposer::CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, + const std::vector &joinFields, + std::string_view compositionName) { if (joinFields.size() > 4) { throw RException(R__FAIL("a maximum of four join fields is allowed")); @@ -78,18 +79,18 @@ ROOT::Experimental::RNTupleProcessor::CreateJoin(RNTupleOpenSpec primaryNTuple, throw RException(R__FAIL("join fields must be unique")); } - std::unique_ptr primaryProcessor = Create(primaryNTuple, processorName); + std::unique_ptr primaryComposition = Create(primaryNTuple, compositionName); - std::unique_ptr auxProcessor = Create(auxNTuple); + std::unique_ptr auxComposition = Create(auxNTuple); - return CreateJoin(std::move(primaryProcessor), std::move(auxProcessor), joinFields, processorName); + return CreateJoin(std::move(primaryComposition), std::move(auxComposition), joinFields, compositionName); } -std::unique_ptr -ROOT::Experimental::RNTupleProcessor::CreateJoin(std::unique_ptr primaryProcessor, - std::unique_ptr auxProcessor, - const std::vector &joinFields, - std::string_view processorName) +std::unique_ptr +ROOT::Experimental::RNTupleComposer::CreateJoin(std::unique_ptr primaryComposition, + std::unique_ptr auxComposition, + const std::vector &joinFields, + std::string_view compositionName) { if (joinFields.size() > 4) { throw RException(R__FAIL("a maximum of four join fields is allowed")); @@ -99,30 +100,30 @@ ROOT::Experimental::RNTupleProcessor::CreateJoin(std::unique_ptr( - new RNTupleJoinProcessor(std::move(primaryProcessor), std::move(auxProcessor), joinFields, processorName)); + return std::unique_ptr( + new RNTupleJoinComposer(std::move(primaryComposition), std::move(auxComposition), joinFields, compositionName)); } //------------------------------------------------------------------------------ -ROOT::Experimental::RNTupleSingleProcessor::RNTupleSingleProcessor(RNTupleOpenSpec ntuple, - std::string_view processorName) - : RNTupleProcessor(processorName), fNTupleSpec(std::move(ntuple)) +ROOT::Experimental::RNTupleSingleComposer::RNTupleSingleComposer(RNTupleOpenSpec ntuple, + std::string_view compositionName) + : RNTupleComposer(compositionName), fNTupleSpec(std::move(ntuple)) { - if (fProcessorName.empty()) { - fProcessorName = fNTupleSpec.fNTupleName; + if (fCompositionName.empty()) { + fCompositionName = fNTupleSpec.fNTupleName; } } -void ROOT::Experimental::RNTupleSingleProcessor::Initialize( - std::shared_ptr entry) +void ROOT::Experimental::RNTupleSingleComposer::Initialize( + std::shared_ptr entry) { - // The processor has already been initialized. + // The composer has already been initialized. if (IsInitialized()) return; if (!entry) - fEntry = std::make_shared(); + fEntry = std::make_shared(); else fEntry = entry; @@ -132,7 +133,7 @@ void ROOT::Experimental::RNTupleSingleProcessor::Initialize( fNEntries = fPageSource->GetNEntries(); } -bool ROOT::Experimental::RNTupleSingleProcessor::CanReadFieldFromDisk(std::string_view fieldName) +bool ROOT::Experimental::RNTupleSingleComposer::CanReadFieldFromDisk(std::string_view fieldName) { Initialize(); auto desc = fPageSource->GetSharedDescriptorGuard(); @@ -143,8 +144,8 @@ bool ROOT::Experimental::RNTupleSingleProcessor::CanReadFieldFromDisk(std::strin } std::unique_ptr -ROOT::Experimental::RNTupleSingleProcessor::CreateAndConnectField(const std::string &qualifiedFieldName, - const std::string &typeName) +ROOT::Experimental::RNTupleSingleComposer::CreateAndConnectField(const std::string &qualifiedFieldName, + const std::string &typeName) { assert(fPageSource); @@ -185,14 +186,14 @@ ROOT::Experimental::RNTupleSingleProcessor::CreateAndConnectField(const std::str return std::move(fieldZero.ReleaseSubfields()[0]); } -ROOT::Experimental::Internal::RNTupleProcessorEntry::FieldIndex_t -ROOT::Experimental::RNTupleSingleProcessor::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, - void *valuePtr, - const Internal::RNTupleProcessorProvenance &provenance) +ROOT::Experimental::Internal::RNTupleComposerEntry::FieldIndex_t +ROOT::Experimental::RNTupleSingleComposer::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, + void *valuePtr, + const Internal::RNTupleCompositionProvenance &provenance) { auto fieldIdx = fEntry->FindFieldIndex(fieldName, typeName); if (!fieldIdx) { - // Strip the processor name prefix(es), if present. + // Strip the composition name prefix(es), if present. std::string qualifiedFieldName = fieldName; if (provenance.IsPresentInFieldName(qualifiedFieldName)) { qualifiedFieldName = qualifiedFieldName.substr(provenance.Get().size() + 1); @@ -203,7 +204,7 @@ ROOT::Experimental::RNTupleSingleProcessor::AddFieldToEntry(const std::string &f if (!field) { throw RException(R__FAIL("cannot register field with name \"" + qualifiedFieldName + "\" because it is not present in the on-disk information of the RNTuple(s) this " - "processor is created from")); + "composition is created from")); } fieldIdx = fEntry->AddField(qualifiedFieldName, std::move(field), valuePtr, provenance); @@ -212,7 +213,7 @@ ROOT::Experimental::RNTupleSingleProcessor::AddFieldToEntry(const std::string &f return *fieldIdx; } -ROOT::NTupleSize_t ROOT::Experimental::RNTupleSingleProcessor::LoadEntry(ROOT::NTupleSize_t entryNumber) +ROOT::NTupleSize_t ROOT::Experimental::RNTupleSingleComposer::LoadEntry(ROOT::NTupleSize_t entryNumber) { if (entryNumber >= fNEntries || !fEntry) return kInvalidNTupleIndex; @@ -221,14 +222,13 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleSingleProcessor::LoadEntry(ROOT::N fEntry->ReadValue(fieldIdx, entryNumber); } - fNEntriesProcessed++; fCurrentEntryNumber = entryNumber; return entryNumber; } -void ROOT::Experimental::RNTupleSingleProcessor::Connect( - const std::unordered_set &fieldIdxs, - const Internal::RNTupleProcessorProvenance & /* provenance */, bool updateFields) +void ROOT::Experimental::RNTupleSingleComposer::Connect( + const std::unordered_set &fieldIdxs, + const Internal::RNTupleCompositionProvenance & /* provenance */, bool updateFields) { Initialize(); @@ -244,14 +244,14 @@ void ROOT::Experimental::RNTupleSingleProcessor::Connect( } } -void ROOT::Experimental::RNTupleSingleProcessor::AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, - ROOT::NTupleSize_t entryOffset) +void ROOT::Experimental::RNTupleSingleComposer::AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, + ROOT::NTupleSize_t entryOffset) { Connect(fFieldIdxs); joinTable.Add(*fPageSource, Internal::RNTupleJoinTable::kDefaultPartitionKey, entryOffset); } -void ROOT::Experimental::RNTupleSingleProcessor::PrintStructureImpl(std::ostream &output) const +void ROOT::Experimental::RNTupleSingleComposer::PrintStructureImpl(std::ostream &output) const { static constexpr int width = 32; @@ -277,40 +277,40 @@ void ROOT::Experimental::RNTupleSingleProcessor::PrintStructureImpl(std::ostream //------------------------------------------------------------------------------ -ROOT::Experimental::RNTupleChainProcessor::RNTupleChainProcessor( - std::vector> processors, std::string_view processorName) - : RNTupleProcessor(processorName), fInnerProcessors(std::move(processors)) +ROOT::Experimental::RNTupleChainComposer::RNTupleChainComposer( + std::vector> compositions, std::string_view compositionName) + : RNTupleComposer(compositionName), fInnerCompositions(std::move(compositions)) { - if (fProcessorName.empty()) { - // `CreateChain` ensures there is at least one inner processor. - fProcessorName = fInnerProcessors[0]->GetProcessorName(); + if (fCompositionName.empty()) { + // `CreateChain` ensures there is at least one inner composition. + fCompositionName = fInnerCompositions[0]->GetCompositionName(); } - fInnerNEntries.assign(fInnerProcessors.size(), kInvalidNTupleIndex); + fInnerNEntries.assign(fInnerCompositions.size(), kInvalidNTupleIndex); } -void ROOT::Experimental::RNTupleChainProcessor::Initialize( - std::shared_ptr entry) +void ROOT::Experimental::RNTupleChainComposer::Initialize( + std::shared_ptr entry) { if (IsInitialized()) return; if (!entry) - fEntry = std::make_shared(); + fEntry = std::make_shared(); else fEntry = entry; - fInnerProcessors[0]->Initialize(fEntry); + fInnerCompositions[0]->Initialize(fEntry); } -ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::GetNEntries() +ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainComposer::GetNEntries() { if (fNEntries == kInvalidNTupleIndex) { fNEntries = 0; - for (unsigned i = 0; i < fInnerProcessors.size(); ++i) { + for (unsigned i = 0; i < fInnerCompositions.size(); ++i) { if (fInnerNEntries[i] == kInvalidNTupleIndex) { - fInnerNEntries[i] = fInnerProcessors[i]->GetNEntries(); + fInnerNEntries[i] = fInnerCompositions[i]->GetNEntries(); } fNEntries += fInnerNEntries[i]; @@ -320,127 +320,127 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::GetNEntries() return fNEntries; } -void ROOT::Experimental::RNTupleChainProcessor::Connect( - const std::unordered_set &fieldIdxs, - const Internal::RNTupleProcessorProvenance &provenance, bool /* updateFields */) +void ROOT::Experimental::RNTupleChainComposer::Connect( + const std::unordered_set &fieldIdxs, + const Internal::RNTupleCompositionProvenance &provenance, bool /* updateFields */) { Initialize(); fFieldIdxs = fieldIdxs; fProvenance = provenance; - ConnectInnerProcessor(fCurrentProcessorNumber); + ConnectInnerComposition(fCurrentChainIndex); } -void ROOT::Experimental::RNTupleChainProcessor::ConnectInnerProcessor(std::size_t processorNumber) +void ROOT::Experimental::RNTupleChainComposer::ConnectInnerComposition(std::size_t chainIdx) { - auto &innerProc = fInnerProcessors[processorNumber]; + auto &innerProc = fInnerCompositions[chainIdx]; innerProc->Initialize(fEntry); innerProc->Connect(fFieldIdxs, fProvenance, /*updateFields=*/true); } -ROOT::Experimental::Internal::RNTupleProcessorEntry::FieldIndex_t -ROOT::Experimental::RNTupleChainProcessor::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, - void *valuePtr, - const Internal::RNTupleProcessorProvenance &provenance) +ROOT::Experimental::Internal::RNTupleComposerEntry::FieldIndex_t +ROOT::Experimental::RNTupleChainComposer::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, + void *valuePtr, + const Internal::RNTupleCompositionProvenance &provenance) { - return fInnerProcessors[fCurrentProcessorNumber]->AddFieldToEntry(fieldName, typeName, valuePtr, provenance); + return fInnerCompositions[fCurrentChainIndex]->AddFieldToEntry(fieldName, typeName, valuePtr, provenance); } -ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::LoadEntry(ROOT::NTupleSize_t entryNumber) +ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainComposer::LoadEntry(ROOT::NTupleSize_t entryNumber) { ROOT::NTupleSize_t localEntryNumber = entryNumber; - std::size_t currProcessorNumber = 0; + std::size_t currChainIdx = 0; if (entryNumber < fCurrentEntryNumber) { - fCurrentProcessorNumber = 0; - ConnectInnerProcessor(fCurrentProcessorNumber); + fCurrentChainIndex = 0; + ConnectInnerComposition(fCurrentChainIndex); } - // As long as the entry fails to load from the current processor, we decrement the local entry number with the number - // of entries in this processor and try with the next processor until we find the correct local entry number. - while (fInnerProcessors[currProcessorNumber]->LoadEntry(localEntryNumber) == kInvalidNTupleIndex) { - if (fInnerNEntries[currProcessorNumber] == kInvalidNTupleIndex) { - fInnerNEntries[currProcessorNumber] = fInnerProcessors[currProcessorNumber]->GetNEntries(); + // As long as the entry fails to load from the current composition, we decrement the local entry number with the + // number of entries in this composition and try with the next composition until we find the correct local entry + // number. + while (fInnerCompositions[currChainIdx]->LoadEntry(localEntryNumber) == kInvalidNTupleIndex) { + if (fInnerNEntries[currChainIdx] == kInvalidNTupleIndex) { + fInnerNEntries[currChainIdx] = fInnerCompositions[currChainIdx]->GetNEntries(); } - localEntryNumber -= fInnerNEntries[currProcessorNumber]; + localEntryNumber -= fInnerNEntries[currChainIdx]; // The provided global entry number is larger than the number of available entries. - if (++currProcessorNumber >= fInnerProcessors.size()) + if (++currChainIdx >= fInnerCompositions.size()) return kInvalidNTupleIndex; - ConnectInnerProcessor(currProcessorNumber); + ConnectInnerComposition(currChainIdx); } - fCurrentProcessorNumber = currProcessorNumber; - fNEntriesProcessed++; + fCurrentChainIndex = currChainIdx; fCurrentEntryNumber = entryNumber; return entryNumber; } -void ROOT::Experimental::RNTupleChainProcessor::AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, - ROOT::NTupleSize_t entryOffset) +void ROOT::Experimental::RNTupleChainComposer::AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, + ROOT::NTupleSize_t entryOffset) { - for (unsigned i = 0; i < fInnerProcessors.size(); ++i) { - const auto &innerProc = fInnerProcessors[i]; - // TODO can this be done (more) lazily? I.e. only when a match cannot be found in the current inner proc? - // At this stage, we don't want to fully initialize (i.e. set the entry of) the inner processor yet + for (unsigned i = 0; i < fInnerCompositions.size(); ++i) { + const auto &innerProc = fInnerCompositions[i]; + // TODO can this be done (more) lazily? I.e. only when a match cannot be found in the current inner composition? + // At this stage, we don't want to fully initialize (i.e. set the entry of) the inner composition yet innerProc->Initialize(nullptr); innerProc->AddEntriesToJoinTable(joinTable, entryOffset); entryOffset += innerProc->GetNEntries(); } } -void ROOT::Experimental::RNTupleChainProcessor::PrintStructureImpl(std::ostream &output) const +void ROOT::Experimental::RNTupleChainComposer::PrintStructureImpl(std::ostream &output) const { - for (const auto &innerProc : fInnerProcessors) { + for (const auto &innerProc : fInnerCompositions) { innerProc->PrintStructure(output); } } //------------------------------------------------------------------------------ -ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(std::unique_ptr primaryProcessor, - std::unique_ptr auxProcessor, - const std::vector &joinFields, - std::string_view processorName) - : RNTupleProcessor(processorName), - fPrimaryProcessor(std::move(primaryProcessor)), - fAuxiliaryProcessor(std::move(auxProcessor)), +ROOT::Experimental::RNTupleJoinComposer::RNTupleJoinComposer(std::unique_ptr primaryComposition, + std::unique_ptr auxComposition, + const std::vector &joinFields, + std::string_view compositionName) + : RNTupleComposer(compositionName), + fPrimaryComposition(std::move(primaryComposition)), + fAuxiliaryComposition(std::move(auxComposition)), fJoinFieldNames(joinFields) { - if (fProcessorName.empty()) { - fProcessorName = fPrimaryProcessor->GetProcessorName(); + if (fCompositionName.empty()) { + fCompositionName = fPrimaryComposition->GetCompositionName(); } } -void ROOT::Experimental::RNTupleJoinProcessor::Initialize( - std::shared_ptr entry) +void ROOT::Experimental::RNTupleJoinComposer::Initialize( + std::shared_ptr entry) { if (IsInitialized()) return; if (!entry) - fEntry = std::make_shared(); + fEntry = std::make_shared(); else fEntry = entry; - fPrimaryProcessor->Initialize(fEntry); - fAuxiliaryProcessor->Initialize(fEntry); + fPrimaryComposition->Initialize(fEntry); + fAuxiliaryComposition->Initialize(fEntry); if (!fJoinFieldNames.empty()) { for (const auto &joinField : fJoinFieldNames) { - if (!fPrimaryProcessor->CanReadFieldFromDisk(joinField)) { - throw RException(R__FAIL("could not find join field \"" + joinField + "\" in primary processor \"" + - fPrimaryProcessor->GetProcessorName() + "\"")); + if (!fPrimaryComposition->CanReadFieldFromDisk(joinField)) { + throw RException(R__FAIL("could not find join field \"" + joinField + "\" in primary composition \"" + + fPrimaryComposition->GetCompositionName() + "\"")); } - if (!fAuxiliaryProcessor->CanReadFieldFromDisk(joinField)) { - throw RException(R__FAIL("could not find join field \"" + joinField + "\" in auxiliary processor \"" + - fAuxiliaryProcessor->GetProcessorName() + "\"")); + if (!fAuxiliaryComposition->CanReadFieldFromDisk(joinField)) { + throw RException(R__FAIL("could not find join field \"" + joinField + "\" in auxiliary composition \"" + + fAuxiliaryComposition->GetCompositionName() + "\"")); } - // We prepend the name of the primary processor in this case to prevent reading from the wrong join field in + // We prepend the name of the primary composition in this case to prevent reading from the wrong join field in // composed join operations. - auto fieldIdx = AddFieldToEntry(fProcessorName + "._join." + joinField, "std::uint64_t", nullptr, - Internal::RNTupleProcessorProvenance(fProcessorName)); + auto fieldIdx = AddFieldToEntry(fCompositionName + "._join." + joinField, "std::uint64_t", nullptr, + Internal::RNTupleCompositionProvenance(fCompositionName)); fJoinFieldIdxs.insert(fieldIdx); } @@ -448,66 +448,66 @@ void ROOT::Experimental::RNTupleJoinProcessor::Initialize( } } -void ROOT::Experimental::RNTupleJoinProcessor::Connect( - const std::unordered_set &fieldIdxs, - const Internal::RNTupleProcessorProvenance &provenance, bool updateFields) +void ROOT::Experimental::RNTupleJoinComposer::Connect( + const std::unordered_set &fieldIdxs, + const Internal::RNTupleCompositionProvenance &provenance, bool updateFields) { Initialize(); - auto auxProvenance = provenance.Evolve(fAuxiliaryProcessor->GetProcessorName()); + auto auxProvenance = provenance.Evolve(fAuxiliaryComposition->GetCompositionName()); for (const auto &fieldIdx : fieldIdxs) { - const auto &fieldProvenance = fEntry->GetFieldProvenance(fieldIdx); + const auto &fieldProvenance = fEntry->GetCompositionProvenance(fieldIdx); if (fieldProvenance.Contains(auxProvenance)) fAuxiliaryFieldIdxs.insert(fieldIdx); else fFieldIdxs.insert(fieldIdx); } - fPrimaryProcessor->Connect(fFieldIdxs, provenance, updateFields); - fAuxiliaryProcessor->Connect(fAuxiliaryFieldIdxs, auxProvenance, updateFields); + fPrimaryComposition->Connect(fFieldIdxs, provenance, updateFields); + fAuxiliaryComposition->Connect(fAuxiliaryFieldIdxs, auxProvenance, updateFields); } -ROOT::Experimental::Internal::RNTupleProcessorEntry::FieldIndex_t -ROOT::Experimental::RNTupleJoinProcessor::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, - void *valuePtr, - const Internal::RNTupleProcessorProvenance &provenance) +ROOT::Experimental::Internal::RNTupleComposerEntry::FieldIndex_t +ROOT::Experimental::RNTupleJoinComposer::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, + void *valuePtr, + const Internal::RNTupleCompositionProvenance &provenance) { - auto auxProvenance = provenance.Evolve(fAuxiliaryProcessor->GetProcessorName()); + auto auxProvenance = provenance.Evolve(fAuxiliaryComposition->GetCompositionName()); if (auxProvenance.IsPresentInFieldName(fieldName)) { - // If the primaryProcessor has a field with the name of the auxProcessor (either as a "proper" field or because - // the primary processor itself is a join where its auxProcessor bears the same name as the current auxProcessor), - // there will be name conflicts, so error out. - if (fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) { + // If the primary composition has a field with the name of the auxiliary composition (either as a "proper" field + // or because the primary composition itself is a join where its auxComposition bears the same name as the + // current aux. composition), there will be name conflicts, so error out. + if (fPrimaryComposition->CanReadFieldFromDisk(fieldName)) { throw RException(R__FAIL("ambiguous field name: \"" + fieldName + - "\" is present in the primary RNTupleProcessor \"" + - fPrimaryProcessor->GetProcessorName() + - "\", but may also refer to a field in the auxiliary RNTupleProcessor named \"" + - fAuxiliaryProcessor->GetProcessorName() + - "\". To avoid this ambiguity, rename the auxiliary RNTupleProcessor.")); + "\" is present in the primary RNTupleComposer \"" + + fPrimaryComposition->GetCompositionName() + + "\", but may also refer to a field in the auxiliary RNTupleComposer named \"" + + fAuxiliaryComposition->GetCompositionName() + + "\". To avoid this ambiguity, rename the auxiliary RNTupleComposer.")); } - auto fieldIdx = fAuxiliaryProcessor->AddFieldToEntry(fieldName, typeName, valuePtr, auxProvenance); + auto fieldIdx = fAuxiliaryComposition->AddFieldToEntry(fieldName, typeName, valuePtr, auxProvenance); if (fieldIdx) fAuxiliaryFieldIdxs.insert(fieldIdx); return fieldIdx; } else { - auto fieldIdx = fPrimaryProcessor->AddFieldToEntry(fieldName, typeName, valuePtr, provenance); + auto fieldIdx = fPrimaryComposition->AddFieldToEntry(fieldName, typeName, valuePtr, provenance); if (fieldIdx) fFieldIdxs.insert(fieldIdx); return fieldIdx; } } -void ROOT::Experimental::RNTupleJoinProcessor::SetAuxiliaryFieldValidity(bool isValid) +void ROOT::Experimental::RNTupleJoinComposer::SetAuxiliaryFieldValidity(bool isValid) { for (const auto &fieldIdx : fAuxiliaryFieldIdxs) { fEntry->SetFieldValidity(fieldIdx, isValid); } } -ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinProcessor::LoadEntry(ROOT::NTupleSize_t entryNumber) +ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinComposer::LoadEntry(ROOT::NTupleSize_t entryNumber) { - if (fPrimaryProcessor->LoadEntry(entryNumber) == kInvalidNTupleIndex) { + if (fPrimaryComposition->LoadEntry(entryNumber) == kInvalidNTupleIndex) { for (auto fieldIdx : fFieldIdxs) { fEntry->SetFieldValidity(fieldIdx, false); } @@ -516,16 +516,15 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinProcessor::LoadEntry(ROOT::NTu } fCurrentEntryNumber = entryNumber; - fNEntriesProcessed++; if (!fJoinTable) { - // The auxiliary processor's fields are valid if the entry could be loaded. - fAuxiliaryProcessor->LoadEntry(entryNumber); + // The auxiliary composition's fields are valid if the entry could be loaded. + fAuxiliaryComposition->LoadEntry(entryNumber); return entryNumber; } if (!fJoinTableIsBuilt) { - fAuxiliaryProcessor->AddEntriesToJoinTable(*fJoinTable); + fAuxiliaryComposition->AddEntriesToJoinTable(*fJoinTable); fJoinTableIsBuilt = true; } @@ -537,7 +536,7 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinProcessor::LoadEntry(ROOT::NTu values.push_back(val); } - // Find the entry index corresponding to the join field values for each auxiliary processor and load the + // Find the entry index corresponding to the join field values for each auxiliary composition and load the // corresponding entry. const auto entryIdx = fJoinTable->GetEntryIndex(values); @@ -553,28 +552,28 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinProcessor::LoadEntry(ROOT::NTu return entryNumber; } -ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinProcessor::GetNEntries() +ROOT::NTupleSize_t ROOT::Experimental::RNTupleJoinComposer::GetNEntries() { if (fNEntries == kInvalidNTupleIndex) - fNEntries = fPrimaryProcessor->GetNEntries(); + fNEntries = fPrimaryComposition->GetNEntries(); return fNEntries; } -void ROOT::Experimental::RNTupleJoinProcessor::AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, - ROOT::NTupleSize_t entryOffset) +void ROOT::Experimental::RNTupleJoinComposer::AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, + ROOT::NTupleSize_t entryOffset) { - fPrimaryProcessor->AddEntriesToJoinTable(joinTable, entryOffset); + fPrimaryComposition->AddEntriesToJoinTable(joinTable, entryOffset); } -void ROOT::Experimental::RNTupleJoinProcessor::PrintStructureImpl(std::ostream &output) const +void ROOT::Experimental::RNTupleJoinComposer::PrintStructureImpl(std::ostream &output) const { std::ostringstream primaryStructureStr; - fPrimaryProcessor->PrintStructure(primaryStructureStr); + fPrimaryComposition->PrintStructure(primaryStructureStr); const auto primaryStructure = ROOT::Split(primaryStructureStr.str(), "\n", /*skipEmpty=*/true); const auto primaryStructureWidth = primaryStructure.front().size(); std::ostringstream auxStructureStr; - fAuxiliaryProcessor->PrintStructure(auxStructureStr); + fAuxiliaryComposition->PrintStructure(auxStructureStr); const auto auxStructure = ROOT::Split(auxStructureStr.str(), "\n", /*skipEmpty=*/true); const auto maxLength = std::max(primaryStructure.size(), auxStructure.size()); diff --git a/tree/ntuple/src/RNTupleProcessorEntry.cxx b/tree/ntuple/src/RNTupleComposerEntry.cxx similarity index 57% rename from tree/ntuple/src/RNTupleProcessorEntry.cxx rename to tree/ntuple/src/RNTupleComposerEntry.cxx index bfe126d9da9c0..944b70e37616a 100644 --- a/tree/ntuple/src/RNTupleProcessorEntry.cxx +++ b/tree/ntuple/src/RNTupleComposerEntry.cxx @@ -13,11 +13,11 @@ * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ -#include +#include -const std::string &ROOT::Experimental::Internal::RNTupleProcessorEntry::FindFieldName(FieldIndex_t fieldIdx) const +const std::string &ROOT::Experimental::Internal::RNTupleComposerEntry::FindFieldName(FieldIndex_t fieldIdx) const { - assert(fieldIdx < fProcessorValues.size()); + assert(fieldIdx < fComposerValues.size()); for (const auto &[fieldName, index] : fFieldName2Index) { if (std::find(index.begin(), index.end(), fieldIdx) != index.end()) { @@ -30,9 +30,9 @@ const std::string &ROOT::Experimental::Internal::RNTupleProcessorEntry::FindFiel return empty; } -std::optional -ROOT::Experimental::Internal::RNTupleProcessorEntry::FindFieldIndex(std::string_view canonicalFieldName, - std::string_view typeName) const +std::optional +ROOT::Experimental::Internal::RNTupleComposerEntry::FindFieldIndex(std::string_view canonicalFieldName, + std::string_view typeName) const { auto it = fFieldName2Index.find(std::string(canonicalFieldName)); if (it == fFieldName2Index.end()) { @@ -43,7 +43,7 @@ ROOT::Experimental::Internal::RNTupleProcessorEntry::FindFieldIndex(std::string_ assert(!fieldIdxs.empty()); for (auto idx : fieldIdxs) { - if (fProcessorValues[idx].fField->GetTypeName() == typeName) { + if (fComposerValues[idx].fField->GetTypeName() == typeName) { return idx; } } @@ -51,10 +51,10 @@ ROOT::Experimental::Internal::RNTupleProcessorEntry::FindFieldIndex(std::string_ return std::nullopt; } -ROOT::Experimental::Internal::RNTupleProcessorEntry::FieldIndex_t -ROOT::Experimental::Internal::RNTupleProcessorEntry::AddField(const std::string &qualifiedFieldName, - std::unique_ptr field, void *valuePtr, - const RNTupleProcessorProvenance &provenance) +ROOT::Experimental::Internal::RNTupleComposerEntry::FieldIndex_t +ROOT::Experimental::Internal::RNTupleComposerEntry::AddField(const std::string &qualifiedFieldName, + std::unique_ptr field, void *valuePtr, + const RNTupleCompositionProvenance &provenance) { auto fieldNameWithProcessorPrefix = qualifiedFieldName; if (const auto &processorPrefix = provenance.Get(); !processorPrefix.empty()) @@ -63,25 +63,25 @@ ROOT::Experimental::Internal::RNTupleProcessorEntry::AddField(const std::string if (FindFieldIndex(fieldNameWithProcessorPrefix, field->GetTypeName())) throw ROOT::RException(R__FAIL("field \"" + fieldNameWithProcessorPrefix + "\" is already present in the entry")); - auto fieldIdx = fProcessorValues.size(); + auto fieldIdx = fComposerValues.size(); fFieldName2Index[fieldNameWithProcessorPrefix].push_back(fieldIdx); assert(field); auto value = field->CreateValue(); if (valuePtr) value.BindRawPtr(valuePtr); - fProcessorValues.emplace_back( - RProcessorValue(std::move(field), qualifiedFieldName, std::move(value), true, provenance)); + fComposerValues.emplace_back( + RComposerValue(std::move(field), qualifiedFieldName, std::move(value), true, provenance)); return fieldIdx; } -void ROOT::Experimental::Internal::RNTupleProcessorEntry::UpdateField(FieldIndex_t fieldIdx, - std::unique_ptr field) +void ROOT::Experimental::Internal::RNTupleComposerEntry::UpdateField(FieldIndex_t fieldIdx, + std::unique_ptr field) { - assert(fieldIdx < fProcessorValues.size()); + assert(fieldIdx < fComposerValues.size()); - auto &fieldInfo = fProcessorValues[fieldIdx]; + auto &fieldInfo = fComposerValues[fieldIdx]; if (field) { auto newValue = field->CreateValue(); @@ -95,28 +95,28 @@ void ROOT::Experimental::Internal::RNTupleProcessorEntry::UpdateField(FieldIndex } } -void ROOT::Experimental::Internal::RNTupleProcessorEntry::BindRawPtr(FieldIndex_t fieldIdx, void *valuePtr) +void ROOT::Experimental::Internal::RNTupleComposerEntry::BindRawPtr(FieldIndex_t fieldIdx, void *valuePtr) { - assert(fieldIdx < fProcessorValues.size()); - fProcessorValues[fieldIdx].fValue.BindRawPtr(valuePtr); + assert(fieldIdx < fComposerValues.size()); + fComposerValues[fieldIdx].fValue.BindRawPtr(valuePtr); } -void ROOT::Experimental::Internal::RNTupleProcessorEntry::ReadValue(FieldIndex_t fieldIdx, ROOT::NTupleSize_t entryIdx) +void ROOT::Experimental::Internal::RNTupleComposerEntry::ReadValue(FieldIndex_t fieldIdx, ROOT::NTupleSize_t entryIdx) { - assert(fieldIdx < fProcessorValues.size()); + assert(fieldIdx < fComposerValues.size()); - if (fProcessorValues[fieldIdx].fIsValid) { - fProcessorValues[fieldIdx].fValue.Read(entryIdx); + if (fComposerValues[fieldIdx].fIsValid) { + fComposerValues[fieldIdx].fValue.Read(entryIdx); } } -std::unordered_set -ROOT::Experimental::Internal::RNTupleProcessorEntry::GetFieldIndices() const +std::unordered_set +ROOT::Experimental::Internal::RNTupleComposerEntry::GetFieldIndices() const { // Field indices are sequentially assigned, and the entry (currently) offers no way to remove fields, so we can just - // generate and return a set {0, ..., |fProcessorValues| - 1}. - std::unordered_set fieldIdxs(fProcessorValues.size()); - std::generate_n(std::inserter(fieldIdxs, fieldIdxs.begin()), fProcessorValues.size(), + // generate and return a set {0, ..., |fComposerValues| - 1}. + std::unordered_set fieldIdxs(fComposerValues.size()); + std::generate_n(std::inserter(fieldIdxs, fieldIdxs.begin()), fComposerValues.size(), [i = 0]() mutable { return i++; }); return fieldIdxs; } diff --git a/tree/ntuple/test/CMakeLists.txt b/tree/ntuple/test/CMakeLists.txt index 7bd5824ddfd7d..a91d12be3b725 100644 --- a/tree/ntuple/test/CMakeLists.txt +++ b/tree/ntuple/test/CMakeLists.txt @@ -56,9 +56,9 @@ ROOT_ADD_GTEST(ntuple_multi_stream ntuple_multi_stream.cxx LIBRARIES ROOTNTuple) ROOT_ADD_GTEST(ntuple_packing ntuple_packing.cxx LIBRARIES ROOTNTuple) ROOT_ADD_GTEST(ntuple_pages ntuple_pages.cxx LIBRARIES ROOTNTuple) ROOT_ADD_GTEST(ntuple_print ntuple_print.cxx LIBRARIES ROOTNTuple CustomStruct) -ROOT_ADD_GTEST(ntuple_processor ntuple_processor.cxx LIBRARIES ROOTNTuple) -ROOT_ADD_GTEST(ntuple_processor_chain ntuple_processor_chain.cxx LIBRARIES ROOTNTuple) -ROOT_ADD_GTEST(ntuple_processor_join ntuple_processor_join.cxx LIBRARIES ROOTNTuple) +ROOT_ADD_GTEST(ntuple_composer ntuple_composer.cxx LIBRARIES ROOTNTuple) +ROOT_ADD_GTEST(ntuple_composer_chain ntuple_composer_chain.cxx LIBRARIES ROOTNTuple) +ROOT_ADD_GTEST(ntuple_composer_join ntuple_composer_join.cxx LIBRARIES ROOTNTuple) ROOT_ADD_GTEST(ntuple_project ntuple_project.cxx LIBRARIES ROOTNTuple) ROOT_ADD_GTEST(ntuple_modelext ntuple_modelext.cxx LIBRARIES ROOTNTuple MathCore CustomStruct) ROOT_ADD_GTEST(ntuple_serialize ntuple_serialize.cxx LIBRARIES ROOTNTuple) diff --git a/tree/ntuple/test/ntuple_composer.cxx b/tree/ntuple/test/ntuple_composer.cxx new file mode 100644 index 0000000000000..5ddb870b4be9d --- /dev/null +++ b/tree/ntuple/test/ntuple_composer.cxx @@ -0,0 +1,839 @@ +#include "ntuple_test.hxx" + +#include + +using ROOT::Experimental::RNTupleComposer; + +TEST(RNTupleComposer, EmptyNTuple) +{ + FileRaii fileGuard("test_ntuple_composer_empty .root"); + { + auto model = RNTupleModel::Create(); + model->MakeField("x"); + auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); + } + + auto composer = RNTupleComposer::Create({"ntuple", fileGuard.GetPath()}); + auto processor = RNTupleProcessor(*composer); + + int nEntries = 0; + for (auto idx [[maybe_unused]] : processor) { + nEntries++; + } + EXPECT_EQ(0, nEntries); + EXPECT_EQ(nEntries, processor.GetNEntriesProcessed()); +} + +TEST(RNTupleComposer, TMemFile) +{ + TMemFile memFile("test_ntuple_composer_tmemfile.root", "RECREATE"); + { + auto model = RNTupleModel::Create(); + auto fldX = model->MakeField("x"); + auto ntuple = RNTupleWriter::Append(std::move(model), "ntuple", memFile); + + for (unsigned i = 0; i < 5; ++i) { + *fldX = static_cast(i); + ntuple->Fill(); + } + } + + auto composer = RNTupleComposer::Create({"ntuple", &memFile}); + + auto x = composer->RequestField("x"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + + EXPECT_FLOAT_EQ(static_cast(idx), *x); + } + + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +TEST(RNTupleComposer, TDirectory) +{ + FileRaii fileGuard("test_ntuple_composer_tdirectoryfile.root"); + { + auto file = std::unique_ptr(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE")); + auto dir = std::unique_ptr(file->mkdir("a/b")); + auto model = RNTupleModel::Create(); + auto fldX = model->MakeField("x"); + auto ntuple = RNTupleWriter::Append(std::move(model), "ntuple", *dir); + + for (unsigned i = 0; i < 5; ++i) { + *fldX = static_cast(i); + ntuple->Fill(); + } + } + + auto file = std::make_unique(fileGuard.GetPath().c_str()); + auto composer = RNTupleComposer::Create({"a/b/ntuple", file.get()}); + auto x = composer->RequestField("x"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + + EXPECT_FLOAT_EQ(static_cast(idx), *x); + } + + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +class RNTupleComposerTest : public testing::Test { +protected: + const std::array fFileNames{"test_ntuple_composer1.root ", "test_ntuple_composer2.root ", + "test_ntuple_composer3.root ", "test_ntuple_composer4.root "}; + const std::array fNTupleNames{"ntuple", "ntuple_aux", "ntuple_aux", "ntuple_aux"}; + + void SetUp() override + { + { + auto model = RNTupleModel::Create(); + auto fldI = model->MakeField("i"); + auto fldX = model->MakeField("x"); + auto fldY = model->MakeField>("y"); + auto fldStruct = model->MakeField("struct"); + auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[0], fFileNames[0]); + + for (unsigned i = 0; i < 5; i++) { + *fldI = i; + *fldX = static_cast(i); + *fldY = {static_cast(i), static_cast(i * 2)}; + fldStruct->a = i * 1.f; + ntuple->Fill(); + } + } + { + auto model = RNTupleModel::Create(); + auto fldI = model->MakeField("i"); + auto fldZ = model->MakeField("z"); + auto fldStruct = model->MakeField("struct"); + auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[1], fFileNames[1]); + + for (unsigned i = 0; i < 5; ++i) { + *fldI = i; + *fldZ = i * 2.f; + fldStruct->a = i * 2.f; + ntuple->Fill(); + } + } + // Same as above, but entries in reverse order + { + auto model = RNTupleModel::Create(); + auto fldI = model->MakeField("i"); + auto fldZ = model->MakeField("z"); + auto fldStruct = model->MakeField("struct"); + auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[2], fFileNames[2]); + + for (int i = 4; i >= 0; --i) { + *fldI = i; + *fldZ = i * 3.f; + fldStruct->a = i * 3.f; + + ntuple->Fill(); + } + } + // Same as above, but the second and fourth entry are missing + { + auto model = RNTupleModel::Create(); + auto fldI = model->MakeField("i"); + auto fldZ = model->MakeField("z"); + auto fldStruct = model->MakeField("struct"); + auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[3], fFileNames[3]); + + for (unsigned i = 0; i < 5; ++i) { + if (i % 2 == 1) + continue; + *fldI = i; + *fldZ = i * 4.f; + fldStruct->a = i * 4.f; + ntuple->Fill(); + } + } + } + + void TearDown() override + { + for (const auto &fileName : fFileNames) { + std::remove(fileName.c_str()); + } + } +}; + +TEST_F(RNTupleComposerTest, Base) +{ + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + auto x = composer->RequestField("x"); + // Check that `RequestField` also works with `void`. + auto y = composer->RequestField("y"); + + try { + composer->RequestField("z"); + FAIL() << "registering fields that do not exist should not be possible"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), + testing::HasSubstr("cannot register field with name \"z\" because it is not present in the on-disk " + "information of the RNTuple(s) this composition is created from")); + } + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + + EXPECT_FLOAT_EQ(static_cast(idx), *x); + + std::vector yExp{static_cast(idx), static_cast((idx) * 2)}; + EXPECT_EQ(yExp, *std::static_pointer_cast>(y.GetPtr())); + } + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, RequestFieldWithPtr) +{ + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + auto xPtr = std::make_shared(); + auto x = composer->RequestField("x", xPtr.get()); + + auto xNewPtr = std::make_shared(); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_FLOAT_EQ(static_cast(idx), *x); + EXPECT_EQ(x.GetRawPtr(), xPtr.get()); + + if (idx == 2) { + x.BindRawPtr(xNewPtr.get()); + xPtr.swap(xNewPtr); + } + } +} + +TEST_F(RNTupleComposerTest, RequestFieldWithVoidPtr) +{ + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + auto xPtr = std::make_shared(); + auto x = composer->RequestField("x", xPtr.get()); + + auto xNewPtr = std::make_shared(); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_FLOAT_EQ(static_cast(idx), *std::static_pointer_cast(x.GetPtr())); + EXPECT_EQ(x.GetRawPtr(), xPtr.get()); + + if (idx == 2) { + x.BindRawPtr(xNewPtr.get()); + xPtr.swap(xNewPtr); + } + } +} + +TEST_F(RNTupleComposerTest, RequestFieldWithTypeString) +{ + { + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + EXPECT_NO_THROW(composer->RequestField("y", "std::vector")); + } + { + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + EXPECT_NO_THROW(composer->RequestField("y", "std::vector")); + } + { + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + EXPECT_THROW(composer->RequestField("y", "std::vetor"), ROOT::RException); + } + + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + auto x = composer->RequestField("x", "float"); + auto yPtr = std::make_shared>(); + auto y = composer->RequestField("y", "std::vector", yPtr.get()); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + + EXPECT_FLOAT_EQ(static_cast(idx), *std::static_pointer_cast(x.GetPtr())); + + std::vector yExp{static_cast(idx), static_cast((idx) * 2)}; + EXPECT_EQ(yExp, *std::static_pointer_cast>(y.GetPtr())); + } + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, AlternativeTypes) +{ + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + auto xAsDouble = composer->RequestField("x"); + auto xAsFloat = composer->RequestField("x"); + + try { + composer->RequestField("x"); + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("in-memory field x of type std::string is incompatible with " + "on-disk field x: incompatible on-disk type name float")); + } + + auto yAsRVec = composer->RequestField>("y"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + + EXPECT_FLOAT_EQ(static_cast(idx), *xAsDouble); + EXPECT_FLOAT_EQ(idx, *xAsFloat); + + ROOT::RVec yExp{static_cast(idx), static_cast((idx) * 2)}; + for (std::size_t i = 0ul; i < yAsRVec->size(); ++i) { + EXPECT_FLOAT_EQ(yExp[i], (*yAsRVec)[i]); + } + } +} + +TEST_F(RNTupleComposerTest, Subfields) +{ + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + auto strct = composer->RequestField("struct"); + auto strct_a = composer->RequestField("struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_FLOAT_EQ(idx, idx); + EXPECT_FLOAT_EQ(strct->a, *strct_a); + } +} + +TEST_F(RNTupleComposerTest, PrintStructureSingle) +{ + auto composer = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + std::ostringstream os; + composer->PrintStructure(os); + + const std::string exp = "+-----------------------------+\n" + "| ntuple |\n" + "| test_ntuple_composer1.root |\n" + "+-----------------------------+\n"; + EXPECT_EQ(exp, os.str()); +} + +TEST_F(RNTupleComposerTest, ChainedChain) +{ + std::vector> innerProcs; + innerProcs.push_back( + RNTupleComposer::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}})); + innerProcs.push_back( + RNTupleComposer::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}})); + + auto composer = RNTupleComposer::CreateChain(std::move(innerProcs)); + + auto i = composer->RequestField("i"); + auto z = composer->RequestField("z"); + auto strct_a = composer->RequestField("struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + if ((idx >= 5 && idx < 10) || idx >= 15) { + EXPECT_EQ(*i, 4 - idx % 5); + EXPECT_EQ(*z, (4 - idx % 5) * 3.f); + } else { + EXPECT_EQ(*i, idx % 5); + EXPECT_EQ(*z, (idx % 5) * 2.f); + } + + EXPECT_EQ(*strct_a, *z); + } + EXPECT_EQ(20, processor.GetNEntriesProcessed()); + + auto zPtr = std::make_shared(); + z.BindRawPtr(zPtr.get()); + auto aPtr = std::make_shared(); + strct_a.BindRawPtr(aPtr.get()); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1 + 20, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + + if ((idx >= 5 && idx < 10) || idx >= 15) { + EXPECT_EQ(*i, 4 - idx % 5); + EXPECT_EQ(*z, (4 - idx % 5) * 3.f); + } else { + EXPECT_EQ(*i, idx % 5); + EXPECT_EQ(*z, (idx % 5) * 2.f); + } + + EXPECT_EQ(*strct_a, *z); + EXPECT_EQ(z.GetPtr().get(), zPtr.get()); + EXPECT_EQ(strct_a.GetPtr().get(), aPtr.get()); + } + EXPECT_EQ(40, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, ChainedJoin) +{ + std::vector> innerProcs; + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); + + auto composer = RNTupleComposer::CreateChain(std::move(innerProcs)); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z = composer->RequestField("ntuple_aux.z"); + auto strct_a = composer->RequestField("ntuple_aux.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 2, *z); + EXPECT_EQ(*z, *strct_a); + } + EXPECT_EQ(10, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, ChainedJoinUnaligned) +{ + std::vector> innerProcs; + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[2], fFileNames[2]}, {"i"})); + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[2], fFileNames[2]}, {"i"})); + + auto composer = RNTupleComposer::CreateChain(std::move(innerProcs)); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z = composer->RequestField("ntuple_aux.z"); + auto strct_a = composer->RequestField("ntuple_aux.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 3, *z); + EXPECT_EQ(*z, *strct_a); + } + EXPECT_EQ(10, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, ChainedJoinMissingEntries) +{ + std::vector> innerProcs; + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i"})); + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i"})); + + auto composer = RNTupleComposer::CreateChain(std::move(innerProcs)); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z = composer->RequestField("ntuple_aux.z"); + auto strct_a = composer->RequestField("ntuple_aux.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + + if ((idx % 5) % 2 == 1) { + EXPECT_FALSE(z.HasValue()); + EXPECT_FALSE(strct_a.HasValue()); + } else { + EXPECT_TRUE(z.HasValue()); + EXPECT_TRUE(strct_a.HasValue()); + EXPECT_EQ(*x * 4, *z); + EXPECT_EQ(*z, *strct_a); + } + } + EXPECT_EQ(10, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedChain) +{ + auto primaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); + + auto auxiliaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[1], fFileNames[1]}}); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z = composer->RequestField("ntuple_aux.z"); + auto strct_a = composer->RequestField("ntuple_aux.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 2, *z); + EXPECT_EQ(*z, *strct_a); + } + EXPECT_EQ(10, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedChainUnaligned) +{ + auto primaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); + + auto auxiliaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[2], fFileNames[2]}, {fNTupleNames[2], fFileNames[2]}}); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {"i"}); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z = composer->RequestField("ntuple_aux.z"); + auto strct_a = composer->RequestField("ntuple_aux.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 3, *z); + EXPECT_EQ(*z, *strct_a); + } + EXPECT_EQ(10, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedChainMissingEntries) +{ + auto primaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); + + auto auxiliaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[3], fFileNames[3]}, {fNTupleNames[3], fFileNames[3]}}); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {"i"}); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z = composer->RequestField("ntuple_aux.z"); + auto strct_a = composer->RequestField("ntuple_aux.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + + if ((idx % 5) % 2 == 1) { + EXPECT_FALSE(z.HasValue()); + EXPECT_FALSE(strct_a.HasValue()); + } else { + EXPECT_TRUE(z.HasValue()); + EXPECT_TRUE(strct_a.HasValue()); + EXPECT_EQ(*x * 4, *z); + EXPECT_EQ(*z, *strct_a); + } + } + EXPECT_EQ(10, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedJoinComposedPrimary) +{ + auto primaryProc = + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {}); + + auto auxProc = RNTupleComposer::Create({fNTupleNames[2], fFileNames[2]}, "ntuple_aux2"); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}, "joined_ntuple"); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z1 = composer->RequestField("ntuple_aux.z"); + auto strct_a1 = composer->RequestField("ntuple_aux.struct.a"); + auto z2 = composer->RequestField("ntuple_aux2.z"); + auto strct_a2 = composer->RequestField("ntuple_aux2.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); + EXPECT_EQ(*x * 3, *z2); + EXPECT_EQ(*x * 3, *strct_a2); + } + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedJoinComposedPrimaryMissingEntries) +{ + auto primaryProc = + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {}); + + auto auxProc = RNTupleComposer::Create({fNTupleNames[3], fFileNames[3]}, "ntuple_aux2"); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z1 = composer->RequestField("ntuple_aux.z"); + auto strct_a1 = composer->RequestField("ntuple_aux.struct.a"); + auto z2 = composer->RequestField("ntuple_aux2.z"); + auto strct_a2 = composer->RequestField("ntuple_aux2.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); + + if (idx % 2 == 1) { + EXPECT_FALSE(z2.HasValue()); + EXPECT_FALSE(strct_a2.HasValue()); + } else { + EXPECT_TRUE(z2.HasValue()); + EXPECT_TRUE(strct_a2.HasValue()); + EXPECT_EQ(*x * 4, *z2); + EXPECT_EQ(*x * 4, *strct_a2); + } + } + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedJoinComposedAuxiliary) +{ + auto primaryProc = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + auto auxProcIntermediate = RNTupleComposer::Create({fNTupleNames[2], fFileNames[2]}, "ntuple_aux2"); + + auto auxProc = RNTupleComposer::CreateJoin(RNTupleComposer::Create({fNTupleNames[1], fFileNames[1]}), + std::move(auxProcIntermediate), {"i"}); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryProc), std::move(auxProc), {}); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z1 = composer->RequestField("ntuple_aux.z"); + auto strct_a1 = composer->RequestField("ntuple_aux.struct.a"); + auto z2 = composer->RequestField("ntuple_aux.ntuple_aux2.z"); + auto strct_a2 = composer->RequestField("ntuple_aux.ntuple_aux2.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); + EXPECT_EQ(*x * 3, *z2); + EXPECT_EQ(*x * 3, *strct_a2); + } + + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedJoinComposedAuxiliaryMissingEntries) +{ + auto primaryProc = RNTupleComposer::Create({fNTupleNames[0], fFileNames[0]}); + + auto auxProcIntermediate = RNTupleComposer::Create({fNTupleNames[3], fFileNames[3]}, "ntuple_aux2"); + + auto auxProc = RNTupleComposer::CreateJoin(RNTupleComposer::Create({fNTupleNames[1], fFileNames[1]}), + std::move(auxProcIntermediate), {"i"}); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryProc), std::move(auxProc), {}); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto z1 = composer->RequestField("ntuple_aux.z"); + auto strct_a1 = composer->RequestField("ntuple_aux.struct.a"); + auto z2 = composer->RequestField("ntuple_aux.ntuple_aux2.z"); + auto strct_a2 = composer->RequestField("ntuple_aux.ntuple_aux2.struct.a"); + + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); + EXPECT_EQ(*i, composer->GetCurrentEntryNumber() % 5); + + EXPECT_EQ(static_cast(*i), *x); + EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); + + if (idx % 2 == 1) { + EXPECT_FALSE(z2.HasValue()); + EXPECT_FALSE(strct_a2.HasValue()); + } else { + EXPECT_TRUE(z2.HasValue()); + EXPECT_TRUE(strct_a2.HasValue()); + EXPECT_EQ(*x * 4, *z2); + EXPECT_EQ(*x * 4, *strct_a2); + } + } + + EXPECT_EQ(5, processor.GetNEntriesProcessed()); +} + +TEST_F(RNTupleComposerTest, JoinedJoinComposedSameName) +{ + auto primaryProc = + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {}); + + auto auxProc = RNTupleComposer::Create({fNTupleNames[2], fFileNames[2]}); + auto composer = RNTupleComposer::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}); + + try { + composer->RequestField("ntuple_aux.z"); + + FAIL() << "creating an auxiliary composer where its name causes conflicts should throw"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), + testing::HasSubstr("ambiguous field name: \"ntuple_aux.z\" is present in the primary RNTupleComposer " + "\"ntuple\", but may also refer to a field in the auxiliary RNTupleComposer named " + "\"ntuple_aux\". To avoid this ambiguity, rename the auxiliary RNTupleComposer.")); + } +} + +TEST_F(RNTupleComposerTest, PrintStructureChainedJoin) +{ + std::vector> innerProcs; + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); + innerProcs.push_back( + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); + + auto composer = RNTupleComposer::CreateChain(std::move(innerProcs)); + + std::ostringstream os; + composer->PrintStructure(os); + + const std::string exp = "+-----------------------------+ +-----------------------------+\n" + "| ntuple | | ntuple_aux |\n" + "| test_ntuple_composer1.root | | test_ntuple_composer2.root |\n" + "+-----------------------------+ +-----------------------------+\n" + "+-----------------------------+ +-----------------------------+\n" + "| ntuple | | ntuple_aux |\n" + "| test_ntuple_composer1.root | | test_ntuple_composer2.root |\n" + "+-----------------------------+ +-----------------------------+\n"; + EXPECT_EQ(exp, os.str()); +} + +TEST_F(RNTupleComposerTest, PrintStructureJoinedChain) +{ + auto primaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); + auto auxiliaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[1], fFileNames[1]}}); + + auto composer = RNTupleComposer::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); + + std::ostringstream os; + composer->PrintStructure(os); + + const std::string exp = "+-----------------------------+ +-----------------------------+\n" + "| ntuple | | ntuple_aux |\n" + "| test_ntuple_composer1.root | | test_ntuple_composer2.root |\n" + "+-----------------------------+ +-----------------------------+\n" + "+-----------------------------+ +-----------------------------+\n" + "| ntuple | | ntuple_aux |\n" + "| test_ntuple_composer1.root | | test_ntuple_composer2.root |\n" + "+-----------------------------+ +-----------------------------+\n"; + EXPECT_EQ(exp, os.str()); +} + +TEST_F(RNTupleComposerTest, PrintStructureJoinedChainAsymmetric) +{ + auto primaryChain = + RNTupleComposer::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); + auto auxiliaryChain = RNTupleComposer::CreateChain({{fNTupleNames[1], fFileNames[1]}}); + + auto proc1 = RNTupleComposer::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); + + std::ostringstream os1; + proc1->PrintStructure(os1); + + const std::string exp1 = "+-----------------------------+ +-----------------------------+\n" + "| ntuple | | ntuple_aux |\n" + "| test_ntuple_composer1.root | | test_ntuple_composer2.root |\n" + "+-----------------------------+ +-----------------------------+\n" + "+-----------------------------+\n" + "| ntuple |\n" + "| test_ntuple_composer1.root |\n" + "+-----------------------------+\n"; + EXPECT_EQ(exp1, os1.str()); + + primaryChain = RNTupleComposer::CreateChain({{fNTupleNames[0], fFileNames[0]}}); + auxiliaryChain = RNTupleComposer::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[1], fFileNames[1]}}); + + auto proc2 = RNTupleComposer::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); + + std::ostringstream os2; + proc2->PrintStructure(os2); + + const std::string exp2 = "+-----------------------------+ +-----------------------------+\n" + "| ntuple | | ntuple_aux |\n" + "| test_ntuple_composer1.root | | test_ntuple_composer2.root |\n" + "+-----------------------------+ +-----------------------------+\n" + " +-----------------------------+\n" + " | ntuple_aux |\n" + " | test_ntuple_composer2.root |\n" + " +-----------------------------+\n"; + EXPECT_EQ(exp2, os2.str()); +} diff --git a/tree/ntuple/test/ntuple_processor_chain.cxx b/tree/ntuple/test/ntuple_composer_chain.cxx similarity index 50% rename from tree/ntuple/test/ntuple_processor_chain.cxx rename to tree/ntuple/test/ntuple_composer_chain.cxx index 8e5c390e3f7e9..37b3a4630e613 100644 --- a/tree/ntuple/test/ntuple_processor_chain.cxx +++ b/tree/ntuple/test/ntuple_composer_chain.cxx @@ -1,14 +1,11 @@ #include "ntuple_test.hxx" -#include - #include -class RNTupleChainProcessorTest : public testing::Test { +class RNTupleChainComposerTest : public testing::Test { protected: - const std::array fFileNames{"test_ntuple_chain_processor1.root", "test_ntuple_chain_processor2.root", - "test_ntuple_chain_processor3.root", - "test_ntuple_chain_processor4.root"}; + const std::array fFileNames{"test_ntuple_chain_composer1.root", "test_ntuple_chain_composer2.root", + "test_ntuple_chain_composer3.root", "test_ntuple_chain_composer4.root"}; const std::string fNTupleName = "ntuple"; @@ -71,67 +68,73 @@ class RNTupleChainProcessorTest : public testing::Test { } }; -TEST(RNTupleChainProcessor, EmptySpec) +TEST(RNTupleChainComposer, EmptySpec) { try { - auto proc = RNTupleProcessor::CreateChain(std::vector{}); - FAIL() << "creating a processor without at least one RNTuple should throw"; + auto composer = RNTupleComposer::CreateChain(std::vector{}); + FAIL() << "creating a composer without at least one RNTuple should throw"; } catch (const ROOT::RException &err) { EXPECT_THAT(err.what(), testing::HasSubstr("at least one RNTuple must be provided")); } } -TEST_F(RNTupleChainProcessorTest, SingleNTuple) +TEST_F(RNTupleChainComposerTest, SingleNTuple) { - auto proc = RNTupleProcessor::CreateChain({{fNTupleName, fFileNames[0]}}); + auto composer = RNTupleComposer::CreateChain({{fNTupleName, fFileNames[0]}}); + + auto x = composer->RequestField("x"); - auto x = proc->RequestField("x"); + auto processor = RNTupleProcessor(*composer); - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); - EXPECT_FLOAT_EQ(static_cast(proc->GetCurrentEntryNumber()), *x); + EXPECT_FLOAT_EQ(static_cast(composer->GetCurrentEntryNumber()), *x); } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); + EXPECT_EQ(5, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleChainProcessorTest, Basic) +TEST_F(RNTupleChainComposerTest, Basic) { - auto proc = RNTupleProcessor::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[1]}}); + auto composer = RNTupleComposer::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[1]}}); - EXPECT_STREQ("ntuple", proc->GetProcessorName().c_str()); + EXPECT_STREQ("ntuple", composer->GetCompositionName().c_str()); { auto namedProc = - RNTupleProcessor::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[1]}}, "my_ntuple"); - EXPECT_STREQ("my_ntuple", namedProc->GetProcessorName().c_str()); + RNTupleComposer::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[1]}}, "my_ntuple"); + EXPECT_STREQ("my_ntuple", namedProc->GetCompositionName().c_str()); } - auto x = proc->RequestField("x"); - auto y = proc->RequestField>("y"); + auto x = composer->RequestField("x"); + auto y = composer->RequestField>("y"); - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); EXPECT_EQ(static_cast(idx), *x); std::vector yExp = {static_cast(idx), static_cast((idx) * 2)}; EXPECT_EQ(yExp, *y); } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); + EXPECT_EQ(10, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleChainProcessorTest, MissingFields) +TEST_F(RNTupleChainComposerTest, MissingFields) { - auto proc = RNTupleProcessor::CreateChain( + auto composer = RNTupleComposer::CreateChain( {{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[2]}, {fNTupleName, fFileNames[1]}}); - auto x = proc->RequestField("x"); - auto y = proc->RequestField>("y"); + auto x = composer->RequestField("x"); + auto y = composer->RequestField>("y"); + + auto processor = RNTupleProcessor(*composer); - for (auto idx : *proc) { + for (auto idx : processor) { EXPECT_EQ(idx % 5, static_cast(*x) % 5); if (idx < 5 || idx >= 10) { @@ -140,12 +143,12 @@ TEST_F(RNTupleChainProcessorTest, MissingFields) EXPECT_FALSE(y.HasValue()); } } - EXPECT_EQ(15, proc->GetNEntriesProcessed()); + EXPECT_EQ(15, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleChainProcessorTest, EmptyNTuples) +TEST_F(RNTupleChainComposerTest, EmptyNTuples) { - FileRaii fileGuard("test_ntuple_processor_empty_ntuples.root"); + FileRaii fileGuard("test_ntuple_composer_empty_ntuples.root"); { auto model = RNTupleModel::Create(); auto fldX = model->MakeField("x"); @@ -156,64 +159,65 @@ TEST_F(RNTupleChainProcessorTest, EmptyNTuples) std::vector ntuples = {{fNTupleName, fileGuard.GetPath()}, {fNTupleName, fFileNames[0]}}; // Empty ntuples are skipped (as long as their model complies) - auto proc = RNTupleProcessor::CreateChain({{fNTupleName, fileGuard.GetPath()}, - {fNTupleName, fFileNames[0]}, - {fNTupleName, fileGuard.GetPath()}, - {fNTupleName, fFileNames[1]}}); + auto composer = RNTupleComposer::CreateChain({{fNTupleName, fileGuard.GetPath()}, + {fNTupleName, fFileNames[0]}, + {fNTupleName, fileGuard.GetPath()}, + {fNTupleName, fFileNames[1]}}); - auto x = proc->RequestField("x"); + auto x = composer->RequestField("x"); - for (auto idx : *proc) { + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { EXPECT_EQ(static_cast(idx), *x); } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); + EXPECT_EQ(10, processor.GetNEntriesProcessed()); } namespace ROOT::Experimental::Internal { -struct RNTupleProcessorEntryLoader { - static ROOT::NTupleSize_t LoadEntry(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber) +struct RNTupleComposerEntryLoader { + static ROOT::NTupleSize_t LoadEntry(RNTupleComposer &composer, ROOT::NTupleSize_t entryNumber) { - processor.Connect(processor.fEntry->GetFieldIndices(), RNTupleProcessorProvenance(), /*updateFields=*/false); - return processor.LoadEntry(entryNumber); + composer.Connect(composer.fEntry->GetFieldIndices(), RNTupleCompositionProvenance(), /*updateFields=*/false); + return composer.LoadEntry(entryNumber); } - static void LoadUnfrozenEntry(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber) + static void LoadUnfrozenEntry(RNTupleComposer &composer, ROOT::NTupleSize_t entryNumber) { - processor.LoadEntry(entryNumber); + composer.LoadEntry(entryNumber); } }; } // namespace ROOT::Experimental::Internal -TEST_F(RNTupleChainProcessorTest, LoadRandomEntry) +TEST_F(RNTupleChainComposerTest, LoadRandomEntry) { - using ROOT::Experimental::Internal::RNTupleProcessorEntryLoader; - - auto proc = RNTupleProcessor::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[1]}}); + using ROOT::Experimental::Internal::RNTupleComposerEntryLoader; + auto composer = RNTupleComposer::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[1]}}); - auto x = proc->RequestField("x"); + auto x = composer->RequestField("x"); - RNTupleProcessorEntryLoader::LoadEntry(*proc, 3); + RNTupleComposerEntryLoader::LoadEntry(*composer, 3); EXPECT_EQ(3.f, *x); - EXPECT_EQ(0, proc->GetCurrentProcessorNumber()); + EXPECT_EQ(0, composer->GetCurrentChainIndex()); - RNTupleProcessorEntryLoader::LoadEntry(*proc, 9); + RNTupleComposerEntryLoader::LoadEntry(*composer, 9); EXPECT_EQ(9.f, *x); - EXPECT_EQ(1, proc->GetCurrentProcessorNumber()); + EXPECT_EQ(1, composer->GetCurrentChainIndex()); - RNTupleProcessorEntryLoader::LoadEntry(*proc, 6); + RNTupleComposerEntryLoader::LoadEntry(*composer, 6); EXPECT_EQ(6.f, *x); - EXPECT_EQ(1, proc->GetCurrentProcessorNumber()); + EXPECT_EQ(1, composer->GetCurrentChainIndex()); - RNTupleProcessorEntryLoader::LoadEntry(*proc, 2); + RNTupleComposerEntryLoader::LoadEntry(*composer, 2); EXPECT_EQ(2.f, *x); - EXPECT_EQ(0, proc->GetCurrentProcessorNumber()); + EXPECT_EQ(0, composer->GetCurrentChainIndex()); - EXPECT_EQ(ROOT::kInvalidNTupleIndex, RNTupleProcessorEntryLoader::LoadEntry(*proc, 10)); + EXPECT_EQ(ROOT::kInvalidNTupleIndex, RNTupleComposerEntryLoader::LoadEntry(*composer, 10)); } -TEST_F(RNTupleChainProcessorTest, TMemFile) +TEST_F(RNTupleChainComposerTest, TMemFile) { - TMemFile memFile("test_ntuple_processor_chain_tmemfile_second.root", "RECREATE"); + TMemFile memFile("test_ntuple_composer_chain_tmemfile_second.root", "RECREATE"); { auto model = RNTupleModel::Create(); auto fldX = model->MakeField("x"); @@ -227,38 +231,40 @@ TEST_F(RNTupleChainProcessorTest, TMemFile) } } - auto proc = RNTupleProcessor::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, &memFile}}); + auto composer = RNTupleComposer::CreateChain({{fNTupleName, fFileNames[0]}, {fNTupleName, &memFile}}); + + auto x = composer->RequestField("x"); - auto x = proc->RequestField("x"); + auto processor = RNTupleProcessor(*composer); - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); EXPECT_EQ(static_cast(idx), *x); } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); + EXPECT_EQ(10, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleChainProcessorTest, PrintStructure) +TEST_F(RNTupleChainComposerTest, PrintStructure) { - auto proc = RNTupleProcessor::CreateChain( + auto composer = RNTupleComposer::CreateChain( {{fNTupleName, fFileNames[0]}, {fNTupleName, fFileNames[1]}, {fNTupleName, fFileNames[2]}}); std::ostringstream os; - proc->PrintStructure(os); + composer->PrintStructure(os); const std::string exp = "+-----------------------------+\n" "| ntuple |\n" - "| test_ntuple_chain_proces... |\n" + "| test_ntuple_chain_compos... |\n" "+-----------------------------+\n" "+-----------------------------+\n" "| ntuple |\n" - "| test_ntuple_chain_proces... |\n" + "| test_ntuple_chain_compos... |\n" "+-----------------------------+\n" "+-----------------------------+\n" "| ntuple |\n" - "| test_ntuple_chain_proces... |\n" + "| test_ntuple_chain_compos... |\n" "+-----------------------------+\n"; EXPECT_EQ(exp, os.str()); } diff --git a/tree/ntuple/test/ntuple_processor_join.cxx b/tree/ntuple/test/ntuple_composer_join.cxx similarity index 55% rename from tree/ntuple/test/ntuple_processor_join.cxx rename to tree/ntuple/test/ntuple_composer_join.cxx index 5bd235cbae4b1..f4eb38b55c773 100644 --- a/tree/ntuple/test/ntuple_processor_join.cxx +++ b/tree/ntuple/test/ntuple_composer_join.cxx @@ -1,13 +1,11 @@ #include "ntuple_test.hxx" -#include - #include -class RNTupleJoinProcessorTest : public testing::Test { +class RNTupleJoinComposerTest : public testing::Test { protected: - const std::array fFileNames{"test_ntuple_join_processor1.root", "test_ntuple_join_processor2.root", - "test_ntuple_join_processor3.root", "test_ntuple_join_processor4.root"}; + const std::array fFileNames{"test_ntuple_join_composer1.root", "test_ntuple_join_composer2.root", + "test_ntuple_join_composer3.root", "test_ntuple_join_composer4.root"}; const std::array fNTupleNames{"ntuple1", "ntuple2", "ntuple3", "ntuple4"}; @@ -85,19 +83,21 @@ class RNTupleJoinProcessorTest : public testing::Test { } }; -TEST_F(RNTupleJoinProcessorTest, Aligned) +TEST_F(RNTupleJoinComposerTest, Aligned) { - auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}, {}); + auto composer = RNTupleComposer::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}, {}); - auto i = proc->RequestField("i"); - auto y = proc->RequestField>("y"); - auto z = proc->RequestField("ntuple3.z"); + auto i = composer->RequestField("i"); + auto y = composer->RequestField>("y"); + auto z = composer->RequestField("ntuple3.z"); std::vector yExpected; - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { + EXPECT_EQ(idx + 1, processor.GetNEntriesProcessed()); + EXPECT_EQ(idx, composer->GetCurrentEntryNumber()); yExpected = {static_cast(*i * 0.2), 3.14, static_cast(*i * 1.3)}; EXPECT_EQ(yExpected, *y); @@ -105,27 +105,29 @@ TEST_F(RNTupleJoinProcessorTest, Aligned) EXPECT_FLOAT_EQ(*i * 2.f, *z); } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); + EXPECT_EQ(10, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleJoinProcessorTest, IdenticalFieldNames) +TEST_F(RNTupleJoinComposerTest, IdenticalFieldNames) { - auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}, {}); + auto composer = RNTupleComposer::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}, {}); + + auto iPrimary = composer->RequestField("i"); + auto iAux = composer->RequestField("ntuple3.i"); - auto iPrimary = proc->RequestField("i"); - auto iAux = proc->RequestField("ntuple3.i"); + auto processor = RNTupleProcessor(*composer); - for (auto it = proc->begin(); it != proc->end(); it++) { + for (auto idx [[maybe_unused]] : processor) { EXPECT_NE(iPrimary.GetPtr(), iAux.GetPtr()); EXPECT_EQ(*iPrimary, *iAux); } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); + EXPECT_EQ(10, processor.GetNEntriesProcessed()); } TEST(RNTupleJoinProcessor, NameConflict) { - FileRaii fileGuard("ntuple_processor_join_name_conflict.root"); + FileRaii fileGuard("ntuple_composer_join_name_conflict.root"); { auto model = RNTupleModel::Create(); auto fldStruct = model->MakeField("struct"); @@ -151,31 +153,34 @@ TEST(RNTupleJoinProcessor, NameConflict) } } - auto proc = RNTupleProcessor::CreateJoin({"ntuple", fileGuard.GetPath()}, {"struct", fileGuard.GetPath()}, {}); + auto composer = RNTupleComposer::CreateJoin({"ntuple", fileGuard.GetPath()}, {"struct", fileGuard.GetPath()}, {}); try { - proc->RequestField("struct.a"); + composer->RequestField("struct.a"); } catch (const ROOT::RException &err) { EXPECT_THAT( err.what(), - testing::HasSubstr("ambiguous field name: \"struct.a\" is present in the primary RNTupleProcessor \"ntuple\", " - "but may also refer to a field in the auxiliary RNTupleProcessor named \"struct\". To " - "avoid this ambiguity, rename the auxiliary RNTupleProcessor.")); + testing::HasSubstr("ambiguous field name: \"struct.a\" is present in the primary RNTupleComposer \"ntuple\", " + "but may also refer to a field in the auxiliary RNTupleComposer named \"struct\". To " + "avoid this ambiguity, rename the auxiliary RNTupleComposer.")); } } -TEST_F(RNTupleJoinProcessorTest, UnalignedSingleJoinField) +TEST_F(RNTupleJoinComposerTest, UnalignedSingleJoinField) { - auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {"i"}); + auto composer = + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {"i"}); - auto iPrimary = proc->RequestField("i"); - auto iAux = proc->RequestField("ntuple2.i"); - auto x = proc->RequestField("x"); - auto y = proc->RequestField>("ntuple2.y"); + auto iPrimary = composer->RequestField("i"); + auto iAux = composer->RequestField("ntuple2.i"); + auto x = composer->RequestField("x"); + auto y = composer->RequestField>("ntuple2.y"); std::vector yExpected; - for (auto idx : *proc) { + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { EXPECT_EQ(idx * 2, *iPrimary); EXPECT_EQ(*iPrimary, *iAux); EXPECT_FLOAT_EQ(*iPrimary * 0.5f, *x); @@ -184,75 +189,81 @@ TEST_F(RNTupleJoinProcessorTest, UnalignedSingleJoinField) EXPECT_EQ(yExpected, *y); } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); + EXPECT_EQ(5, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleJoinProcessorTest, UnalignedMultipleJoinFields) +TEST_F(RNTupleJoinComposerTest, UnalignedMultipleJoinFields) { try { - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, - {"i", "j", "k", "l", "m"}); - FAIL() << "trying to create a join processor with more than four join fields should throw"; + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, + {"i", "j", "k", "l", "m"}); + FAIL() << "trying to create an RNTupleJoinComposer with more than four join fields should throw"; } catch (const ROOT::RException &err) { EXPECT_THAT(err.what(), testing::HasSubstr("a maximum of four join fields is allowed")); } try { - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i", "i"}); - FAIL() << "trying to create a join processor with duplicate join fields should throw"; + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i", "i"}); + FAIL() << "trying to create an RNTupleJoinComposer with duplicate join fields should throw"; } catch (const ROOT::RException &err) { EXPECT_THAT(err.what(), testing::HasSubstr("join fields must be unique")); } try { - auto proc = - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {"l"}); - // Without registering a field, the processor won't be initialized. - proc->RequestField("x"); - FAIL() << "trying to use a join processor where not all join fields are present in the primary processor should " - "throw"; + auto composer = + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {"l"}); + // Without registering a field, the compoer won't be initialized. + composer->RequestField("x"); + FAIL() << "trying to use an RNTupleJoinComposer where not all join fields are present in the primary composition " + "should throw"; } catch (const ROOT::RException &err) { - EXPECT_THAT(err.what(), testing::HasSubstr("could not find join field \"l\" in primary processor \"ntuple1\"")); + EXPECT_THAT(err.what(), testing::HasSubstr("could not find join field \"l\" in primary composition \"ntuple1\"")); } try { - auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, - {"i", "j", "k"}); - // Without registering a field, the processor won't be initialized. - proc->RequestField("x"); - FAIL() << "trying to use a join processor where not all join fields are present in the auxiliary processor " - "should throw"; + auto composer = RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, + {"i", "j", "k"}); + // Without registering a field, the composer won't be initialized. + composer->RequestField("x"); + FAIL() << "trying to use an RNTupleJoinComposer where not all join fields are present in the auxiliary " + "composition should throw"; } catch (const ROOT::RException &err) { - EXPECT_THAT(err.what(), testing::HasSubstr("could not find join field \"j\" in auxiliary processor \"ntuple2\"")); + EXPECT_THAT(err.what(), + testing::HasSubstr("could not find join field \"j\" in auxiliary composition \"ntuple2\"")); } - auto proc = - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i", "j", "k"}); + auto composer = + RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i", "j", "k"}); + + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto a = composer->RequestField("ntuple4.a"); - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto a = proc->RequestField("ntuple4.a"); + auto processor = RNTupleProcessor(*composer); - for (auto idx : *proc) { - EXPECT_EQ(proc->GetCurrentEntryNumber(), idx); + for (auto idx : processor) { + EXPECT_EQ(composer->GetCurrentEntryNumber(), idx); - EXPECT_FLOAT_EQ(proc->GetCurrentEntryNumber() * 2, *i); + EXPECT_FLOAT_EQ(composer->GetCurrentEntryNumber() * 2, *i); EXPECT_FLOAT_EQ(*i * 0.5f, *x); EXPECT_EQ(*i * 0.1f, *a); } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); + EXPECT_EQ(5, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleJoinProcessorTest, MissingEntries) +TEST_F(RNTupleJoinComposerTest, MissingEntries) { - auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[3], fFileNames[3]}, {"i"}); + auto composer = + RNTupleComposer::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[3], fFileNames[3]}, {"i"}); - auto i = proc->RequestField("i"); - auto a = proc->RequestField("ntuple4.a"); + auto i = composer->RequestField("i"); + auto a = composer->RequestField("ntuple4.a"); std::vector yExpected; - auto procIter = proc->begin(); + auto processor = RNTupleProcessor(*composer); + + auto procIter = processor.begin(); EXPECT_TRUE(a.HasValue()); EXPECT_EQ(*i * 0.1f, *a); ++procIter; @@ -269,9 +280,9 @@ TEST_F(RNTupleJoinProcessorTest, MissingEntries) EXPECT_EQ(*i * 0.1f, *a); } -TEST_F(RNTupleJoinProcessorTest, TMemFile) +TEST_F(RNTupleJoinComposerTest, TMemFile) { - TMemFile memFile("test_ntuple_processor_join_tmemfile.root", "RECREATE"); + TMemFile memFile("test_ntuple_composer_join_tmemfile.root", "RECREATE"); { auto model = RNTupleModel::Create(); auto fldI = model->MakeField("i"); @@ -285,15 +296,17 @@ TEST_F(RNTupleJoinProcessorTest, TMemFile) } } - auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {"ntuple_aux", &memFile}, {"i"}); + auto composer = RNTupleComposer::CreateJoin({fNTupleNames[0], fFileNames[0]}, {"ntuple_aux", &memFile}, {"i"}); - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto y = proc->RequestField>("ntuple_aux.y"); + auto i = composer->RequestField("i"); + auto x = composer->RequestField("x"); + auto y = composer->RequestField>("ntuple_aux.y"); std::vector yExpected; - for (auto idx : *proc) { + auto processor = RNTupleProcessor(*composer); + + for (auto idx : processor) { EXPECT_EQ(idx * 2, *i); EXPECT_FLOAT_EQ(*i * 0.5f, *x); @@ -302,19 +315,19 @@ TEST_F(RNTupleJoinProcessorTest, TMemFile) EXPECT_EQ(yExpected, *y); } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); + EXPECT_EQ(5, processor.GetNEntriesProcessed()); } -TEST_F(RNTupleJoinProcessorTest, PrintStructure) +TEST_F(RNTupleJoinComposerTest, PrintStructure) { - auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}, {}); + auto composer = RNTupleComposer::CreateJoin({fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}, {}); std::ostringstream os; - proc->PrintStructure(os); + composer->PrintStructure(os); const std::string exp = "+-----------------------------+ +-----------------------------+\n" "| ntuple2 | | ntuple3 |\n" - "| test_ntuple_join_process... | | test_ntuple_join_process... |\n" + "| test_ntuple_join_compose... | | test_ntuple_join_compose... |\n" "+-----------------------------+ +-----------------------------+\n"; EXPECT_EQ(exp, os.str()); } diff --git a/tree/ntuple/test/ntuple_processor.cxx b/tree/ntuple/test/ntuple_processor.cxx deleted file mode 100644 index e96c6a6b3f9c6..0000000000000 --- a/tree/ntuple/test/ntuple_processor.cxx +++ /dev/null @@ -1,800 +0,0 @@ -#include "ntuple_test.hxx" - -#include - -#include - -TEST(RNTupleProcessor, EmptyNTuple) -{ - FileRaii fileGuard("test_ntuple_processor_empty.root"); - { - auto model = RNTupleModel::Create(); - model->MakeField("x"); - auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); - } - - auto proc = RNTupleProcessor::Create({"ntuple", fileGuard.GetPath()}); - - int nEntries = 0; - for (auto it = proc->begin(); it != proc->end(); it++) { - nEntries++; - } - EXPECT_EQ(0, nEntries); - EXPECT_EQ(nEntries, proc->GetNEntriesProcessed()); -} - -TEST(RNTupleProcessor, TMemFile) -{ - TMemFile memFile("test_ntuple_processor_tmemfile.root", "RECREATE"); - { - auto model = RNTupleModel::Create(); - auto fldX = model->MakeField("x"); - auto ntuple = RNTupleWriter::Append(std::move(model), "ntuple", memFile); - - for (unsigned i = 0; i < 5; ++i) { - *fldX = static_cast(i); - ntuple->Fill(); - } - } - - auto proc = RNTupleProcessor::Create({"ntuple", &memFile}); - - auto x = proc->RequestField("x"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - - EXPECT_FLOAT_EQ(static_cast(idx), *x); - } - - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -TEST(RNTupleProcessor, TDirectory) -{ - FileRaii fileGuard("test_ntuple_processor_tdirectoryfile.root"); - { - auto file = std::unique_ptr(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE")); - auto dir = std::unique_ptr(file->mkdir("a/b")); - auto model = RNTupleModel::Create(); - auto fldX = model->MakeField("x"); - auto ntuple = RNTupleWriter::Append(std::move(model), "ntuple", *dir); - - for (unsigned i = 0; i < 5; ++i) { - *fldX = static_cast(i); - ntuple->Fill(); - } - } - - auto file = std::make_unique(fileGuard.GetPath().c_str()); - auto proc = RNTupleProcessor::Create({"a/b/ntuple", file.get()}); - auto x = proc->RequestField("x"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - - EXPECT_FLOAT_EQ(static_cast(idx), *x); - } - - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -class RNTupleProcessorTest : public testing::Test { -protected: - const std::array fFileNames{"test_ntuple_processor1.root", "test_ntuple_processor2.root", - "test_ntuple_processor3.root", "test_ntuple_processor4.root"}; - const std::array fNTupleNames{"ntuple", "ntuple_aux", "ntuple_aux", "ntuple_aux"}; - - void SetUp() override - { - { - auto model = RNTupleModel::Create(); - auto fldI = model->MakeField("i"); - auto fldX = model->MakeField("x"); - auto fldY = model->MakeField>("y"); - auto fldStruct = model->MakeField("struct"); - auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[0], fFileNames[0]); - - for (unsigned i = 0; i < 5; i++) { - *fldI = i; - *fldX = static_cast(i); - *fldY = {static_cast(i), static_cast(i * 2)}; - fldStruct->a = i * 1.f; - ntuple->Fill(); - } - } - { - auto model = RNTupleModel::Create(); - auto fldI = model->MakeField("i"); - auto fldZ = model->MakeField("z"); - auto fldStruct = model->MakeField("struct"); - auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[1], fFileNames[1]); - - for (unsigned i = 0; i < 5; ++i) { - *fldI = i; - *fldZ = i * 2.f; - fldStruct->a = i * 2.f; - ntuple->Fill(); - } - } - // Same as above, but entries in reverse order - { - auto model = RNTupleModel::Create(); - auto fldI = model->MakeField("i"); - auto fldZ = model->MakeField("z"); - auto fldStruct = model->MakeField("struct"); - auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[2], fFileNames[2]); - - for (int i = 4; i >= 0; --i) { - *fldI = i; - *fldZ = i * 3.f; - fldStruct->a = i * 3.f; - - ntuple->Fill(); - } - } - // Same as above, but the second and fourth entry are missing - { - auto model = RNTupleModel::Create(); - auto fldI = model->MakeField("i"); - auto fldZ = model->MakeField("z"); - auto fldStruct = model->MakeField("struct"); - auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[3], fFileNames[3]); - - for (unsigned i = 0; i < 5; ++i) { - if (i % 2 == 1) - continue; - *fldI = i; - *fldZ = i * 4.f; - fldStruct->a = i * 4.f; - ntuple->Fill(); - } - } - } - - void TearDown() override - { - for (const auto &fileName : fFileNames) { - std::remove(fileName.c_str()); - } - } -}; - -TEST_F(RNTupleProcessorTest, Base) -{ - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - auto x = proc->RequestField("x"); - // Check that `RequestField` also works with `void`. - auto y = proc->RequestField("y"); - - try { - proc->RequestField("z"); - FAIL() << "registering fields that do not exist should not be possible"; - } catch (const ROOT::RException &err) { - EXPECT_THAT(err.what(), - testing::HasSubstr("cannot register field with name \"z\" because it is not present in the on-disk " - "information of the RNTuple(s) this processor is created from")); - } - - for (auto idx : *proc) { - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - - EXPECT_FLOAT_EQ(static_cast(idx), *x); - - std::vector yExp{static_cast(idx), static_cast((idx) * 2)}; - EXPECT_EQ(yExp, *std::static_pointer_cast>(y.GetPtr())); - } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, RequestFieldWithPtr) -{ - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - auto xPtr = std::make_shared(); - auto x = proc->RequestField("x", xPtr.get()); - - auto xNewPtr = std::make_shared(); - - for (auto idx : *proc) { - EXPECT_FLOAT_EQ(static_cast(idx), *x); - EXPECT_EQ(x.GetRawPtr(), xPtr.get()); - - if (idx == 2) { - x.BindRawPtr(xNewPtr.get()); - xPtr.swap(xNewPtr); - } - } -} - -TEST_F(RNTupleProcessorTest, RequestFieldWithVoidPtr) -{ - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - auto xPtr = std::make_shared(); - auto x = proc->RequestField("x", xPtr.get()); - - auto xNewPtr = std::make_shared(); - - for (auto idx : *proc) { - EXPECT_FLOAT_EQ(static_cast(idx), *std::static_pointer_cast(x.GetPtr())); - EXPECT_EQ(x.GetRawPtr(), xPtr.get()); - - if (idx == 2) { - x.BindRawPtr(xNewPtr.get()); - xPtr.swap(xNewPtr); - } - } -} - -TEST_F(RNTupleProcessorTest, RequestFieldWithTypeString) -{ - { - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - EXPECT_NO_THROW(proc->RequestField("y", "std::vector")); - } - { - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - EXPECT_NO_THROW(proc->RequestField("y", "std::vector")); - } - { - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - EXPECT_THROW(proc->RequestField("y", "std::vetor"), ROOT::RException); - } - - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - auto x = proc->RequestField("x", "float"); - auto yPtr = std::make_shared>(); - auto y = proc->RequestField("y", "std::vector", yPtr.get()); - - for (auto idx : *proc) { - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - - EXPECT_FLOAT_EQ(static_cast(idx), *std::static_pointer_cast(x.GetPtr())); - - std::vector yExp{static_cast(idx), static_cast((idx) * 2)}; - EXPECT_EQ(yExp, *std::static_pointer_cast>(y.GetPtr())); - } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, AlternativeTypes) -{ - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - auto xAsDouble = proc->RequestField("x"); - auto xAsFloat = proc->RequestField("x"); - - try { - proc->RequestField("x"); - } catch (const ROOT::RException &err) { - EXPECT_THAT(err.what(), testing::HasSubstr("in-memory field x of type std::string is incompatible with " - "on-disk field x: incompatible on-disk type name float")); - } - - auto yAsRVec = proc->RequestField>("y"); - - for (auto idx : *proc) { - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - - EXPECT_FLOAT_EQ(static_cast(idx), *xAsDouble); - EXPECT_FLOAT_EQ(idx, *xAsFloat); - - ROOT::RVec yExp{static_cast(idx), static_cast((idx) * 2)}; - for (std::size_t i = 0ul; i < yAsRVec->size(); ++i) { - EXPECT_FLOAT_EQ(yExp[i], (*yAsRVec)[i]); - } - } -} - -TEST_F(RNTupleProcessorTest, Subfields) -{ - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - auto strct = proc->RequestField("struct"); - auto strct_a = proc->RequestField("struct.a"); - - for (auto idx : *proc) { - EXPECT_FLOAT_EQ(idx, idx); - EXPECT_FLOAT_EQ(strct->a, *strct_a); - } -} - -TEST_F(RNTupleProcessorTest, PrintStructureSingle) -{ - auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - std::ostringstream os; - proc->PrintStructure(os); - - const std::string exp = "+-----------------------------+\n" - "| ntuple |\n" - "| test_ntuple_processor1.root |\n" - "+-----------------------------+\n"; - EXPECT_EQ(exp, os.str()); -} - -TEST_F(RNTupleProcessorTest, ChainedChain) -{ - std::vector> innerProcs; - innerProcs.push_back( - RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}})); - innerProcs.push_back( - RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}})); - - auto proc = RNTupleProcessor::CreateChain(std::move(innerProcs)); - - auto i = proc->RequestField("i"); - auto z = proc->RequestField("z"); - auto strct_a = proc->RequestField("struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - if ((idx >= 5 && idx < 10) || idx >= 15) { - EXPECT_EQ(*i, 4 - idx % 5); - EXPECT_EQ(*z, (4 - idx % 5) * 3.f); - } else { - EXPECT_EQ(*i, idx % 5); - EXPECT_EQ(*z, (idx % 5) * 2.f); - } - - EXPECT_EQ(*strct_a, *z); - } - EXPECT_EQ(20, proc->GetNEntriesProcessed()); - - auto zPtr = std::make_shared(); - z.BindRawPtr(zPtr.get()); - auto aPtr = std::make_shared(); - strct_a.BindRawPtr(aPtr.get()); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1 + 20, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - - if ((idx >= 5 && idx < 10) || idx >= 15) { - EXPECT_EQ(*i, 4 - idx % 5); - EXPECT_EQ(*z, (4 - idx % 5) * 3.f); - } else { - EXPECT_EQ(*i, idx % 5); - EXPECT_EQ(*z, (idx % 5) * 2.f); - } - - EXPECT_EQ(*strct_a, *z); - EXPECT_EQ(z.GetPtr().get(), zPtr.get()); - EXPECT_EQ(strct_a.GetPtr().get(), aPtr.get()); - } - EXPECT_EQ(40, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, ChainedJoin) -{ - std::vector> innerProcs; - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); - - auto proc = RNTupleProcessor::CreateChain(std::move(innerProcs)); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z = proc->RequestField("ntuple_aux.z"); - auto strct_a = proc->RequestField("ntuple_aux.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 2, *z); - EXPECT_EQ(*z, *strct_a); - } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, ChainedJoinUnaligned) -{ - std::vector> innerProcs; - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[2], fFileNames[2]}, {"i"})); - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[2], fFileNames[2]}, {"i"})); - - auto proc = RNTupleProcessor::CreateChain(std::move(innerProcs)); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z = proc->RequestField("ntuple_aux.z"); - auto strct_a = proc->RequestField("ntuple_aux.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 3, *z); - EXPECT_EQ(*z, *strct_a); - } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, ChainedJoinMissingEntries) -{ - std::vector> innerProcs; - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i"})); - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[3], fFileNames[3]}, {"i"})); - - auto proc = RNTupleProcessor::CreateChain(std::move(innerProcs)); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z = proc->RequestField("ntuple_aux.z"); - auto strct_a = proc->RequestField("ntuple_aux.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - - if ((idx % 5) % 2 == 1) { - EXPECT_FALSE(z.HasValue()); - EXPECT_FALSE(strct_a.HasValue()); - } else { - EXPECT_TRUE(z.HasValue()); - EXPECT_TRUE(strct_a.HasValue()); - EXPECT_EQ(*x * 4, *z); - EXPECT_EQ(*z, *strct_a); - } - } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedChain) -{ - auto primaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); - - auto auxiliaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[1], fFileNames[1]}}); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z = proc->RequestField("ntuple_aux.z"); - auto strct_a = proc->RequestField("ntuple_aux.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 2, *z); - EXPECT_EQ(*z, *strct_a); - } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedChainUnaligned) -{ - auto primaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); - - auto auxiliaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[2], fFileNames[2]}, {fNTupleNames[2], fFileNames[2]}}); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {"i"}); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z = proc->RequestField("ntuple_aux.z"); - auto strct_a = proc->RequestField("ntuple_aux.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 3, *z); - EXPECT_EQ(*z, *strct_a); - } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedChainMissingEntries) -{ - auto primaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); - - auto auxiliaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[3], fFileNames[3]}, {fNTupleNames[3], fFileNames[3]}}); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {"i"}); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z = proc->RequestField("ntuple_aux.z"); - auto strct_a = proc->RequestField("ntuple_aux.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - - if ((idx % 5) % 2 == 1) { - EXPECT_FALSE(z.HasValue()); - EXPECT_FALSE(strct_a.HasValue()); - } else { - EXPECT_TRUE(z.HasValue()); - EXPECT_TRUE(strct_a.HasValue()); - EXPECT_EQ(*x * 4, *z); - EXPECT_EQ(*z, *strct_a); - } - } - EXPECT_EQ(10, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedJoinComposedPrimary) -{ - auto primaryProc = - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {}); - - auto auxProc = RNTupleProcessor::Create({fNTupleNames[2], fFileNames[2]}, "ntuple_aux2"); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}, "joined_ntuple"); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z1 = proc->RequestField("ntuple_aux.z"); - auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); - auto z2 = proc->RequestField("ntuple_aux2.z"); - auto strct_a2 = proc->RequestField("ntuple_aux2.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 2, *z1); - EXPECT_EQ(*x * 2, *strct_a1); - EXPECT_EQ(*x * 3, *z2); - EXPECT_EQ(*x * 3, *strct_a2); - } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedJoinComposedPrimaryMissingEntries) -{ - auto primaryProc = - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {}); - - auto auxProc = RNTupleProcessor::Create({fNTupleNames[3], fFileNames[3]}, "ntuple_aux2"); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z1 = proc->RequestField("ntuple_aux.z"); - auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); - auto z2 = proc->RequestField("ntuple_aux2.z"); - auto strct_a2 = proc->RequestField("ntuple_aux2.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 2, *z1); - EXPECT_EQ(*x * 2, *strct_a1); - - if (idx % 2 == 1) { - EXPECT_FALSE(z2.HasValue()); - EXPECT_FALSE(strct_a2.HasValue()); - } else { - EXPECT_TRUE(z2.HasValue()); - EXPECT_TRUE(strct_a2.HasValue()); - EXPECT_EQ(*x * 4, *z2); - EXPECT_EQ(*x * 4, *strct_a2); - } - } - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedJoinComposedAuxiliary) -{ - auto primaryProc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - auto auxProcIntermediate = RNTupleProcessor::Create({fNTupleNames[2], fFileNames[2]}, "ntuple_aux2"); - - auto auxProc = RNTupleProcessor::CreateJoin(RNTupleProcessor::Create({fNTupleNames[1], fFileNames[1]}), - std::move(auxProcIntermediate), {"i"}); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryProc), std::move(auxProc), {}); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z1 = proc->RequestField("ntuple_aux.z"); - auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); - auto z2 = proc->RequestField("ntuple_aux.ntuple_aux2.z"); - auto strct_a2 = proc->RequestField("ntuple_aux.ntuple_aux2.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 2, *z1); - EXPECT_EQ(*x * 2, *strct_a1); - EXPECT_EQ(*x * 3, *z2); - EXPECT_EQ(*x * 3, *strct_a2); - } - - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedJoinComposedAuxiliaryMissingEntries) -{ - auto primaryProc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); - - auto auxProcIntermediate = RNTupleProcessor::Create({fNTupleNames[3], fFileNames[3]}, "ntuple_aux2"); - - auto auxProc = RNTupleProcessor::CreateJoin(RNTupleProcessor::Create({fNTupleNames[1], fFileNames[1]}), - std::move(auxProcIntermediate), {"i"}); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryProc), std::move(auxProc), {}); - - auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); - auto z1 = proc->RequestField("ntuple_aux.z"); - auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); - auto z2 = proc->RequestField("ntuple_aux.ntuple_aux2.z"); - auto strct_a2 = proc->RequestField("ntuple_aux.ntuple_aux2.struct.a"); - - for (auto idx : *proc) { - EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); - EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(*x * 2, *z1); - EXPECT_EQ(*x * 2, *strct_a1); - - if (idx % 2 == 1) { - EXPECT_FALSE(z2.HasValue()); - EXPECT_FALSE(strct_a2.HasValue()); - } else { - EXPECT_TRUE(z2.HasValue()); - EXPECT_TRUE(strct_a2.HasValue()); - EXPECT_EQ(*x * 4, *z2); - EXPECT_EQ(*x * 4, *strct_a2); - } - } - - EXPECT_EQ(5, proc->GetNEntriesProcessed()); -} - -TEST_F(RNTupleProcessorTest, JoinedJoinComposedSameName) -{ - auto primaryProc = - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {}); - - auto auxProc = RNTupleProcessor::Create({fNTupleNames[2], fFileNames[2]}); - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}); - - try { - proc->RequestField("ntuple_aux.z"); - - FAIL() << "creating an auxiliary processor where its name causes conflicts should throw"; - } catch (const ROOT::RException &err) { - EXPECT_THAT(err.what(), testing::HasSubstr( - "ambiguous field name: \"ntuple_aux.z\" is present in the primary RNTupleProcessor " - "\"ntuple\", but may also refer to a field in the auxiliary RNTupleProcessor named " - "\"ntuple_aux\". To avoid this ambiguity, rename the auxiliary RNTupleProcessor.")); - } -} - -TEST_F(RNTupleProcessorTest, PrintStructureChainedJoin) -{ - std::vector> innerProcs; - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); - innerProcs.push_back( - RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {})); - - auto proc = RNTupleProcessor::CreateChain(std::move(innerProcs)); - - std::ostringstream os; - proc->PrintStructure(os); - - const std::string exp = "+-----------------------------+ +-----------------------------+\n" - "| ntuple | | ntuple_aux |\n" - "| test_ntuple_processor1.root | | test_ntuple_processor2.root |\n" - "+-----------------------------+ +-----------------------------+\n" - "+-----------------------------+ +-----------------------------+\n" - "| ntuple | | ntuple_aux |\n" - "| test_ntuple_processor1.root | | test_ntuple_processor2.root |\n" - "+-----------------------------+ +-----------------------------+\n"; - EXPECT_EQ(exp, os.str()); -} - -TEST_F(RNTupleProcessorTest, PrintStructureJoinedChain) -{ - auto primaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); - auto auxiliaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[1], fFileNames[1]}}); - - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); - - std::ostringstream os; - proc->PrintStructure(os); - - const std::string exp = "+-----------------------------+ +-----------------------------+\n" - "| ntuple | | ntuple_aux |\n" - "| test_ntuple_processor1.root | | test_ntuple_processor2.root |\n" - "+-----------------------------+ +-----------------------------+\n" - "+-----------------------------+ +-----------------------------+\n" - "| ntuple | | ntuple_aux |\n" - "| test_ntuple_processor1.root | | test_ntuple_processor2.root |\n" - "+-----------------------------+ +-----------------------------+\n"; - EXPECT_EQ(exp, os.str()); -} - -TEST_F(RNTupleProcessorTest, PrintStructureJoinedChainAsymmetric) -{ - auto primaryChain = - RNTupleProcessor::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}}); - auto auxiliaryChain = RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}}); - - auto proc1 = RNTupleProcessor::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); - - std::ostringstream os1; - proc1->PrintStructure(os1); - - const std::string exp1 = "+-----------------------------+ +-----------------------------+\n" - "| ntuple | | ntuple_aux |\n" - "| test_ntuple_processor1.root | | test_ntuple_processor2.root |\n" - "+-----------------------------+ +-----------------------------+\n" - "+-----------------------------+\n" - "| ntuple |\n" - "| test_ntuple_processor1.root |\n" - "+-----------------------------+\n"; - EXPECT_EQ(exp1, os1.str()); - - primaryChain = RNTupleProcessor::CreateChain({{fNTupleNames[0], fFileNames[0]}}); - auxiliaryChain = RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[1], fFileNames[1]}}); - - auto proc2 = RNTupleProcessor::CreateJoin(std::move(primaryChain), std::move(auxiliaryChain), {}); - - std::ostringstream os2; - proc2->PrintStructure(os2); - - const std::string exp2 = "+-----------------------------+ +-----------------------------+\n" - "| ntuple | | ntuple_aux |\n" - "| test_ntuple_processor1.root | | test_ntuple_processor2.root |\n" - "+-----------------------------+ +-----------------------------+\n" - " +-----------------------------+\n" - " | ntuple_aux |\n" - " | test_ntuple_processor2.root |\n" - " +-----------------------------+\n"; - EXPECT_EQ(exp2, os2.str()); -} diff --git a/tree/ntuple/test/ntuple_test.hxx b/tree/ntuple/test/ntuple_test.hxx index e4cd028af99f1..7218bb71a9fcc 100644 --- a/tree/ntuple/test/ntuple_test.hxx +++ b/tree/ntuple/test/ntuple_test.hxx @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,7 @@ using RMiniFileReader = ROOT::Internal::RMiniFileReader; using RNTupleAtomicCounter = ROOT::Experimental::Detail::RNTupleAtomicCounter; using RNTupleAtomicTimer = ROOT::Experimental::Detail::RNTupleAtomicTimer; using RNTupleCalcPerf = ROOT::Experimental::Detail::RNTupleCalcPerf; +using RNTupleComposer = ROOT::Experimental::RNTupleComposer; using RNTupleCompressor = ROOT::Internal::RNTupleCompressor; using RNTupleDecompressor = ROOT::Internal::RNTupleDecompressor; using RNTupleDescriptor = ROOT::RNTupleDescriptor; diff --git a/tutorials/io/ntuple/ntpl012_processor_chain.C b/tutorials/io/ntuple/ntpl012_processor_chain.C index 99e800493e894..8feb61b0b3887 100644 --- a/tutorials/io/ntuple/ntpl012_processor_chain.C +++ b/tutorials/io/ntuple/ntpl012_processor_chain.C @@ -1,7 +1,7 @@ /// \file /// \ingroup tutorial_ntuple /// \notebook -/// Demonstrate the RNTupleProcessor for vertical compositions (chains) of RNTuples +/// Demonstrate the RNTupleComposer and RNTupleProcessor for vertical compositions (chains) of RNTuples /// /// \macro_image /// \macro_code @@ -9,7 +9,7 @@ /// \date April 2024 /// \author The ROOT Team -// NOTE: The RNTupleProcessor and related classes are experimental at this point. +// NOTE: The RNTupleComposer, RNTupleProcessor and related classes are experimental at this point. // Functionality and interface are still subject to changes. #include @@ -21,6 +21,7 @@ #include // Import classes from the `Experimental` namespace for the time being. +using ROOT::Experimental::RNTupleComposer; using ROOT::Experimental::RNTupleOpenSpec; using ROOT::Experimental::RNTupleProcessor; @@ -60,26 +61,29 @@ void Write(std::string_view ntupleName, std::string_view fileName) void Read(const std::vector &ntuples) { - auto c = new TCanvas("c", "RNTupleProcessor Example", 200, 10, 700, 500); + auto c = new TCanvas("c", "RNTupleComposer chain Example", 200, 10, 700, 500); TH1F hPx("h", "This is the px distribution", 100, -4, 4); hPx.SetFillColor(48); - // The chain-based processor can be created by passing a list of RNTupleOpenSpecs, describing the name and location - // of each ntuple in the chain. - auto processor = RNTupleProcessor::CreateChain(ntuples); - int prevProcessorNumber{-1}; + // The chain-based composition can be created by passing a list of RNTupleOpenSpecs, describing the name and location + // of each RNTuple in the chain. + auto composer = RNTupleComposer::CreateChain(ntuples); + int prevChainIdx{-1}; - // Access to the processor's fields is done by first requesting them through RNTupleProcessor::RequestField(). The + // Access to the composition's fields is done by first requesting them through RNTupleComposer::RequestField(). The // returned value can be used to read the current entry's value for that particular field. - auto px = processor->RequestField>("vpx"); + auto px = composer->RequestField>("vpx"); + + // Create the processor responsible for reading entries from the composer created above. + RNTupleProcessor processor(*composer); // The iterator value is the index of the current entry being processed. - for (auto idx : *processor) { + for (auto idx : processor) { // The RNTupleProcessor provides some additional bookkeeping information, such as the current processor number. - if (static_cast(processor->GetCurrentProcessorNumber()) > prevProcessorNumber) { - prevProcessorNumber = processor->GetCurrentProcessorNumber(); - std::cout << "Processing `ntuple" << prevProcessorNumber + 1 << "` (" << idx + 1 - << " total entries processed so far)" << std::endl; + if (static_cast(composer->GetCurrentChainIndex()) > prevChainIdx) { + prevChainIdx = composer->GetCurrentChainIndex(); + std::cout << "Processing `ntuple" << prevChainIdx + 1 << "` (" << idx + 1 << " total entries processed so far)" + << std::endl; } // We use the value returned from requesting the field to read its data for the current entry. @@ -88,7 +92,7 @@ void Read(const std::vector &ntuples) } } - std::cout << "Processed a total of " << processor->GetNEntriesProcessed() << " entries" << std::endl; + std::cout << "Processed a total of " << processor.GetNEntriesProcessed() << " entries" << std::endl; hPx.DrawCopy(); } @@ -99,8 +103,8 @@ void ntpl012_processor_chain() Write("ntuple2", "ntuple2.root"); Write("ntuple3", "ntuple3.root"); - // The ntuples to generate and subsequently process. The model of the first ntuple will be used to construct the - // entry used by the processor. + // The ntuples to generate, compose and subsequently process. The schema of the first ntuple will be used to + // construct the entry used by the composer. std::vector ntuples = { {"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}, {"ntuple3", "ntuple3.root"}}; diff --git a/tutorials/io/ntuple/ntpl015_processor_join.C b/tutorials/io/ntuple/ntpl015_processor_join.C index 836057ef49931..b7f346f74e951 100644 --- a/tutorials/io/ntuple/ntpl015_processor_join.C +++ b/tutorials/io/ntuple/ntpl015_processor_join.C @@ -1,7 +1,7 @@ /// \file /// \ingroup tutorial_ntuple /// \notebook -/// Demonstrate the RNTupleProcessor for horizontal compositions (joins) of RNTuples +/// Demonstrate the RNTupleComposer and RNTupleProcessor for horizontal compositions (joins) of RNTuples /// /// \macro_image /// \macro_code @@ -9,7 +9,7 @@ /// \date November 2024 /// \author The ROOT Team -// NOTE: The RNTupleProcessor and related classes are experimental at this point. +// NOTE: The RNTupleComposer, RNTupleProcessor and related classes are experimental at this point. // Functionality and interface are still subject to changes. #include @@ -21,6 +21,7 @@ #include // Import classes from the `Experimental` namespace for the time being. +using ROOT::Experimental::RNTupleComposer; using ROOT::Experimental::RNTupleOpenSpec; using ROOT::Experimental::RNTupleProcessor; @@ -73,32 +74,35 @@ void WriteAux(std::string_view ntupleName, std::string_view ntupleFileName) void Read() { - auto c = new TCanvas("c", "RNTupleJoinProcessor Example", 200, 10, 700, 500); + auto c = new TCanvas("c", "RNTupleComposer join Example", 200, 10, 700, 500); TH1F hPy("h", "This is the px + py distribution", 100, -4, 4); hPy.SetFillColor(48); - // The first specified ntuple is the primary ntuple and will be used to drive the processor loop. The subsequent - // list of ntuples (in this case, only one) are auxiliary and will be joined with the entries from the primary - // ntuple. We specify field "i" as the join field. This field, which should be present in all ntuples specified is + // The first specified RNTuple is the primary ntuple. Its entries will be loaded sequentially by the + // RNTupleProcessor. The subsequent RNTuple is auxiliary and will be joined with the entries from the primary + // RNTuple. We specify field "i" as the join field. This field, which should be present in all ntuples specified is // used to identify which entries belong together. Multiple join fields can be specified, in which case the // combination of field values is used. It is possible to specify up to 4 join fields. Providing an empty list of - // join fields signals to the processor that all entries are aligned. - auto processor = - RNTupleProcessor::CreateJoin({kPrimaryNTupleName, kMainNTuplePath}, {kAuxNTupleName, kAuxNTuplePath}, {"i"}); + // join fields signals to the composer that all entries are aligned. + auto composer = + RNTupleComposer::CreateJoin({kPrimaryNTupleName, kMainNTuplePath}, {kAuxNTupleName, kAuxNTuplePath}, {"i"}); - // Access to the processor's fields is done by first requesting them through RNTupleProcessor::RequestField(). The + // Access to the composer's fields is done by first requesting them through RNTupleComposer::RequestField(). The // returned value can be used to read the current entry's value for that particular field. Fields from the primary // ntuple are requested by their original name. - auto px = processor->RequestField("vpx"); + auto px = composer->RequestField("vpx"); // Fields from auxiliary ntuples are requested by prepending the name of the auxiliary ntuple. - auto py = processor->RequestField(kAuxNTupleName + ".vpy"); + auto py = composer->RequestField(kAuxNTupleName + ".vpy"); + + // Create the processor responsible for reading entries from the composer created above. + RNTupleProcessor processor(*composer); // The iterator value is the index of the current entry being processed. In this example, we don't use it. - for (auto _ : *processor) { + for (auto _ : processor) { hPy.Fill(*px + *py); } - std::cout << "Processed a total of " << processor->GetNEntriesProcessed() << " entries" << std::endl; + std::cout << "Processed a total of " << processor.GetNEntriesProcessed() << " entries" << std::endl; hPy.DrawCopy(); }