diff --git a/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx b/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx index 7b578cace2f03..2fa2beda553eb 100644 --- a/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleProcessor.hxx @@ -250,7 +250,6 @@ class RNTupleProcessor { protected: std::string fProcessorName; - std::unique_ptr fProtoModel = nullptr; std::shared_ptr fEntry = nullptr; std::unordered_set fFieldIdxs; @@ -263,13 +262,12 @@ protected: std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor ///////////////////////////////////////////////////////////////////////////// - /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting - /// an existing one. + /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. virtual void Initialize(std::shared_ptr entry) = 0; ///////////////////////////////////////////////////////////////////////////// /// \brief Check if the processor already has been initialized. - bool IsInitialized() const { return fProtoModel && fEntry; } + bool IsInitialized() const { return fEntry != nullptr; } ///////////////////////////////////////////////////////////////////////////// /// \brief Connect fields to the page source of the processor's underlying RNTuple(s). @@ -289,17 +287,6 @@ protected: /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise. virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0; - ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the proto model used by the processor. - /// - /// A processor's proto model contains all fields that can be accessed and is inferred from the descriptors of the - /// underlying RNTuples. It is used in RequestField() to check that the requested field is actually valid. - const ROOT::RNTupleModel &GetProtoModel() const - { - assert(fProtoModel); - return *fProtoModel; - } - ///////////////////////////////////////////////////////////////////////////// /// \brief Get the total number of entries in this processor virtual ROOT::NTupleSize_t GetNEntries() = 0; @@ -315,6 +302,7 @@ protected: /// /// /// \param[in] fieldName Name of the field to add. + /// \param[in] typeName Type of the field to add. /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be /// created. /// \param[in] provenance Provenance of the processor. @@ -322,8 +310,8 @@ protected: /// \return The index of the newly added field in the entry. /// /// In case the field was already present in the entry, the index of the existing field is returned. - virtual ROOT::RResult - AddFieldToEntry(std::string_view fieldName, void *valuePtr, + virtual Internal::RNTupleProcessorEntry::FieldIndex_t + AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance) = 0; ///////////////////////////////////////////////////////////////////////////// @@ -383,6 +371,8 @@ public: /// \tparam T Type of the requested field. /// /// \param[in] fieldName Name of the requested field. + /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be + /// created. /// /// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value. /// @@ -392,11 +382,14 @@ public: /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read. template - RNTupleProcessorOptionalPtr RequestField(std::string_view fieldName, void *valuePtr = nullptr) + RNTupleProcessorOptionalPtr RequestField(const std::string &fieldName, void *valuePtr = nullptr) { Initialize(fEntry); - // TODO handle alternative (compatible field types) - auto fieldIdx = AddFieldToEntry(fieldName, valuePtr, Internal::RNTupleProcessorProvenance()).Unwrap(); + std::string typeName{}; + if constexpr (!std::is_void_v) { + typeName = ROOT::Internal::GetRenormalizedTypeName(typeid(T)); + } + auto fieldIdx = AddFieldToEntry(fieldName, typeName, valuePtr, Internal::RNTupleProcessorProvenance()); return RNTupleProcessorOptionalPtr(fEntry.get(), fieldIdx); } @@ -563,8 +556,18 @@ private: std::unique_ptr fPageSource; ///////////////////////////////////////////////////////////////////////////// - /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting - /// an existing one. + /// \brief Create a new field and connect it to the processor's page source. + /// + /// \param[in] qualifiedFieldName Name of the field to add, prefixed with its parent fields, if applicable. + /// \param[in] typeName Type of the field to add. + /// + /// \return The newly created field. + /// \throws ROOT::RException In case the requested field cannot be found on disk. + std::unique_ptr + CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName); + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. /// /// At this point, the page source for the underlying RNTuple of the processor will be created and opened. void Initialize(std::shared_ptr entry = nullptr) final; @@ -602,8 +605,8 @@ private: /// \brief Add a field to the entry. /// /// \sa RNTupleProcessor::AddFieldToEntry() - ROOT::RResult AddFieldToEntry( - std::string_view fieldName, void *valuePtr = nullptr, + Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry( + const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; ///////////////////////////////////////////////////////////////////////////// @@ -633,8 +636,9 @@ public: RNTupleSingleProcessor &operator=(RNTupleSingleProcessor &&) = delete; ~RNTupleSingleProcessor() override { - // The proto model needs to be deleted before fPageSource. - fProtoModel.release(); + // The entry's fields need to be deleted before fPageSource. + if (fEntry) + fEntry->Clear(); }; }; @@ -655,8 +659,7 @@ private: Internal::RNTupleProcessorProvenance fProvenance; ///////////////////////////////////////////////////////////////////////////// - /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting - /// an existing one. + /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. void Initialize(std::shared_ptr entry = nullptr) final; ///////////////////////////////////////////////////////////////////////////// @@ -697,8 +700,8 @@ private: /// \brief Add a field to the entry. /// /// \sa RNTupleProcessor::AddFieldToEntry() - ROOT::RResult AddFieldToEntry( - std::string_view fieldName, void *valuePtr = nullptr, + Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry( + const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; ///////////////////////////////////////////////////////////////////////////// @@ -753,8 +756,7 @@ private: std::unordered_set fAuxiliaryFieldIdxs; - /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting - /// an existing one. + /// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one. void Initialize(std::shared_ptr entry = nullptr) final; ///////////////////////////////////////////////////////////////////////////// @@ -775,17 +777,6 @@ private: /// \brief Get the total number of entries in this processor. ROOT::NTupleSize_t GetNEntries() final; - ///////////////////////////////////////////////////////////////////////////// - /// \brief Set the processor's proto model by combining the primary and auxiliary models. - /// - /// \param[in] primaryModel The proto model of the primary processor. - /// \param[in] auxModel The proto model of the auxiliary processors. - /// - /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each - /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model. - /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`. - void SetProtoModel(std::unique_ptr primaryModel, std::unique_ptr auxModel); - ///////////////////////////////////////////////////////////////////////////// /// \brief Set the validity for all fields in the auxiliary processor at once. void SetAuxiliaryFieldValidity(bool validity); @@ -809,8 +800,8 @@ private: /// \brief Add a field to the entry. /// /// \sa RNTupleProcessor::AddFieldToEntry() - ROOT::RResult AddFieldToEntry( - std::string_view fieldName, void *valuePtr = nullptr, + Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry( + const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr, const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; ///////////////////////////////////////////////////////////////////////////// diff --git a/tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx b/tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx index 3a9132c9c0649..85e01964ae502 100644 --- a/tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx @@ -97,20 +97,36 @@ public: private: struct RProcessorValue { + std::unique_ptr fField; + std::string fQualifiedFieldName; ROOT::RFieldBase::RValue fValue; bool fIsValid; RNTupleProcessorProvenance fProcessorProvenance; - RProcessorValue(ROOT::RFieldBase::RValue &&value, bool isValid, RNTupleProcessorProvenance provenance) - : fValue(std::move(value)), fIsValid(isValid), fProcessorProvenance(provenance) + RProcessorValue(std::unique_ptr field, std::string_view qualifiedFieldName, + ROOT::RFieldBase::RValue &&value, bool isValid, RNTupleProcessorProvenance provenance) + : fField(std::move(field)), + fQualifiedFieldName(qualifiedFieldName), + fValue(std::move(value)), + fIsValid(isValid), + fProcessorProvenance(provenance) { } }; std::vector fProcessorValues; - std::unordered_map fFieldName2Index; + // Maps from the field name to all type alternatives for that field that have been added to the entry. + std::unordered_map> fFieldName2Index; public: + ///////////////////////////////////////////////////////////////////////////// + /// \brief Clear all fields from the entry. + void Clear() + { + fProcessorValues.clear(); + fFieldName2Index.clear(); + } + ///////////////////////////////////////////////////////////////////////////// /// \brief Set the validity of a field, i.e. whether it is possible to read its value in the current entry. /// @@ -143,7 +159,7 @@ public: assert(fieldIdx < fProcessorValues.size()); for (const auto &[fieldName, index] : fFieldName2Index) { - if (index == fieldIdx) { + if (index.find(fieldIdx) != index.end()) { return fieldName; } } @@ -156,41 +172,59 @@ public: ///////////////////////////////////////////////////////////////////////////// /// \brief Find the field index of the provided field in the entry. /// - /// \param[in] fieldName The name of the field in the entry. + /// \param[in] canonicalFieldName The name of the field in the entry, including its processor name prefixes and + /// parent field names, if applicable. /// /// \return A `std::optional` containing the field index if it was found. - std::optional FindFieldIndex(std::string_view fieldName) const + std::optional FindFieldIndex(std::string_view canonicalFieldName, std::string_view typeName) const { - auto it = fFieldName2Index.find(std::string(fieldName)); + auto it = fFieldName2Index.find(std::string(canonicalFieldName)); if (it == fFieldName2Index.end()) { return std::nullopt; } - return it->second; + + const auto &fieldIdxs = it->second; + assert(!fieldIdxs.empty()); + + for (auto idx : fieldIdxs) { + if (fProcessorValues[idx].fField->GetTypeName() == typeName) { + return idx; + } + } + + return std::nullopt; } ///////////////////////////////////////////////////////////////////////////// /// \brief Add a new field to the entry. /// - /// \param[in] fieldName Name of the field to add. + /// \param[in] qualifiedFieldName Name of the field to add, including its parent field if applicable. /// \param[in] field Reference to the field to add, used to to create its corresponding RValue. /// \param[in] valuePtr Pointer to an object corresponding to the field's type to bind to its value. If this is a /// `nullptr`, a pointer will be created. /// \param[in] provenance Processor provenance of the field. /// /// \return The field index of the newly added field. - FieldIndex_t AddField(std::string_view fieldName, ROOT::RFieldBase &field, void *valuePtr, + FieldIndex_t AddField(const std::string &qualifiedFieldName, std::unique_ptr field, void *valuePtr, const RNTupleProcessorProvenance &provenance) { - if (FindFieldIndex(fieldName)) + auto fieldNameWithProcessorPrefix = qualifiedFieldName; + if (const auto &processorPrefix = provenance.Get(); !processorPrefix.empty()) + fieldNameWithProcessorPrefix = processorPrefix + "." + qualifiedFieldName; + + if (FindFieldIndex(fieldNameWithProcessorPrefix, field->GetTypeName())) throw ROOT::RException( - R__FAIL("field \"" + field.GetQualifiedFieldName() + "\" is already present in the entry")); + R__FAIL("field \"" + fieldNameWithProcessorPrefix + "\" is already present in the entry")); - auto value = field.CreateValue(); + auto fieldIdx = fProcessorValues.size(); + fFieldName2Index[fieldNameWithProcessorPrefix].insert(fieldIdx); + + assert(field); + auto value = field->CreateValue(); if (valuePtr) value.BindRawPtr(valuePtr); - auto fieldIdx = fProcessorValues.size(); - fFieldName2Index[std::string(fieldName)] = fieldIdx; - fProcessorValues.emplace_back(RProcessorValue(std::move(value), true, provenance)); + fProcessorValues.emplace_back( + RProcessorValue(std::move(field), qualifiedFieldName, std::move(value), true, provenance)); return fieldIdx; } @@ -200,14 +234,22 @@ public: /// /// \param[in] fieldIdx Index of the field to update. /// \param[in] field The new field to use in the entry. - void UpdateField(FieldIndex_t fieldIdx, ROOT::RFieldBase &field) + void UpdateField(FieldIndex_t fieldIdx, std::unique_ptr field) { assert(fieldIdx < fProcessorValues.size()); - auto currValuePtr = fProcessorValues[fieldIdx].fValue.GetPtr(); - auto value = field.CreateValue(); - value.Bind(currValuePtr); - fProcessorValues[fieldIdx].fValue = value; + auto &fieldInfo = fProcessorValues[fieldIdx]; + + if (field) { + auto newValue = field->CreateValue(); + auto currValuePtr = fieldInfo.fValue.GetPtr(); + newValue.Bind(currValuePtr); + fieldInfo.fField = std::move(field); + fieldInfo.fValue = std::move(newValue); + fieldInfo.fIsValid = true; + } else { + fieldInfo.fIsValid = false; + } } ///////////////////////////////////////////////////////////////////////////// @@ -275,14 +317,13 @@ public: } ///////////////////////////////////////////////////////////////////////////// - /// \brief Get the name of a field in the entry, including processor name prefixes in the case of auxiliary fields. + /// \brief Get the name of a field in the entry, including its parent fields. /// /// \param[in] fieldIdx The index of the field in the entry. - std::string GetFieldName(FieldIndex_t fieldIdx) const + std::string GetQualifiedFieldName(FieldIndex_t fieldIdx) const { assert(fieldIdx < fProcessorValues.size()); - return fProcessorValues[fieldIdx].fProcessorProvenance.Get() + "." + - fProcessorValues[fieldIdx].fValue.GetField().GetQualifiedFieldName(); + return fProcessorValues[fieldIdx].fQualifiedFieldName; } ///////////////////////////////////////////////////////////////////////////// diff --git a/tree/ntuple/src/RNTupleProcessor.cxx b/tree/ntuple/src/RNTupleProcessor.cxx index 03c9072dbf271..ed7ee7afed738 100644 --- a/tree/ntuple/src/RNTupleProcessor.cxx +++ b/tree/ntuple/src/RNTupleProcessor.cxx @@ -129,10 +129,11 @@ void ROOT::Experimental::RNTupleSingleProcessor::Initialize( fPageSource = fNTupleSpec.CreatePageSource(); fPageSource->Attach(); + + fNEntries = fPageSource->GetNEntries(); + ROOT::RNTupleDescriptor::RCreateModelOptions opts; opts.SetCreateBare(true); - fProtoModel = fPageSource->GetSharedDescriptorGuard()->CreateModel(opts); - fProtoModel->Unfreeze(); } bool ROOT::Experimental::RNTupleSingleProcessor::CanReadFieldFromDisk(std::string_view fieldName) @@ -145,28 +146,74 @@ bool ROOT::Experimental::RNTupleSingleProcessor::CanReadFieldFromDisk(std::strin return desc->FindFieldId(fieldName, fieldZeroId) != ROOT::kInvalidDescriptorId; } -ROOT::RResult -ROOT::Experimental::RNTupleSingleProcessor::AddFieldToEntry(std::string_view fieldName, void *valuePtr, +std::unique_ptr +ROOT::Experimental::RNTupleSingleProcessor::CreateAndConnectField(const std::string &qualifiedFieldName, + const std::string &typeName) +{ + assert(fPageSource); + + std::string onDiskFieldName = qualifiedFieldName; + + // Stip the "_join" prefix (for join fields) from the field name, if present. + if (onDiskFieldName.find("_join.") == 0) { + onDiskFieldName = onDiskFieldName.substr(6); + } + + const auto &desc = fPageSource->GetSharedDescriptorGuard().GetRef(); + ROOT::RFieldZero fieldZero; + fieldZero.SetOnDiskId(desc.GetFieldZeroId()); + + const auto onDiskFieldId = desc.FindFieldId(onDiskFieldName); + + if (onDiskFieldId == kInvalidDescriptorId) { + return nullptr; + } + + std::unique_ptr field; + if (typeName.empty()) { + const auto &fieldDesc = desc.GetFieldDescriptor(onDiskFieldId); + field = fieldDesc.CreateField(desc); + } else { + // Strip the parent field name prefix(es), if present. + std::string subfieldName = onDiskFieldName; + auto posDot = onDiskFieldName.find_last_of('.'); + if (posDot != std::string::npos) + subfieldName = onDiskFieldName.substr(posDot + 1); + + field = ROOT::RFieldBase::Create(subfieldName, typeName).Unwrap(); + } + + field->SetOnDiskId(onDiskFieldId); + fieldZero.Attach(std::move(field)); + ROOT::Internal::CallConnectPageSourceOnField(fieldZero, *fPageSource); + return std::move(fieldZero.ReleaseSubfields()[0]); +} + +ROOT::Experimental::Internal::RNTupleProcessorEntry::FieldIndex_t +ROOT::Experimental::RNTupleSingleProcessor::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, + void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance) { - auto fieldIdx = fEntry->FindFieldIndex(fieldName); + auto fieldIdx = fEntry->FindFieldIndex(fieldName, typeName); if (!fieldIdx) { - try { - std::string onDiskFieldName = std::string(fieldName); - if (provenance.IsPresentInFieldName(onDiskFieldName)) { - onDiskFieldName = onDiskFieldName.substr(provenance.Get().size() + 1); - } - auto &field = fProtoModel->GetMutableField(onDiskFieldName); - fieldIdx = fEntry->AddField(fieldName, field, valuePtr, provenance); - return *fieldIdx; - } catch (const ROOT::RException &) { - return R__FAIL("cannot register field with name \"" + std::string(fieldName) + - "\" because it is not present in the on-disk information of the RNTuple(s) this " - "processor is created from"); + // Strip the processor name prefix(es), if present. + std::string qualifiedFieldName = fieldName; + if (provenance.IsPresentInFieldName(qualifiedFieldName)) { + qualifiedFieldName = qualifiedFieldName.substr(provenance.Get().size() + 1); } - } else { - return *fieldIdx; + + auto field = CreateAndConnectField(qualifiedFieldName, typeName); + + if (!field) { + throw RException(R__FAIL("cannot register field with name \"" + qualifiedFieldName + + "\" because it is not present in the on-disk information of the RNTuple(s) this " + "processor is created from")); + } + + fieldIdx = fEntry->AddField(qualifiedFieldName, std::move(field), valuePtr, provenance); } + + return *fieldIdx; } ROOT::NTupleSize_t ROOT::Experimental::RNTupleSingleProcessor::LoadEntry(ROOT::NTupleSize_t entryNumber) @@ -189,44 +236,16 @@ void ROOT::Experimental::RNTupleSingleProcessor::Connect( { Initialize(); - // The processor has already been connected. - if (fNEntries != kInvalidNTupleIndex && !updateFields) - return; - fFieldIdxs = fieldIdxs; - fNEntries = fPageSource->GetNEntries(); - - auto desc = fPageSource->GetSharedDescriptorGuard(); - auto &fieldZero = ROOT::Internal::GetFieldZeroOfModel(*fProtoModel); - auto fieldZeroId = desc->GetFieldZeroId(); - fieldZero.SetOnDiskId(fieldZeroId); - ROOT::Internal::SetAllowFieldSubstitutions(fieldZero, true); - - for (const auto &fieldIdx : fieldIdxs) { - const auto &entryField = fEntry->GetField(fieldIdx); - - // TODO handle subfields - auto onDiskId = desc->FindFieldId(entryField.GetQualifiedFieldName(), fieldZeroId); - // The field we are trying to connect is not present in the ntuple - if (onDiskId == kInvalidDescriptorId) { - fEntry->SetFieldValidity(fieldIdx, false); - continue; - } - auto &modelField = fProtoModel->GetMutableField(entryField.GetQualifiedFieldName()); + if (updateFields) { + for (const auto &fieldIdx : fFieldIdxs) { + auto newField = + CreateAndConnectField(fEntry->GetQualifiedFieldName(fieldIdx), fEntry->GetField(fieldIdx).GetTypeName()); - if (entryField.GetState() == RFieldBase::EState::kConnectedToSource && &entryField != &modelField) { - fEntry->UpdateField(fieldIdx, modelField); + fEntry->UpdateField(fieldIdx, std::move(newField)); } - - if (modelField.GetState() == RFieldBase::EState::kUnconnected) { - modelField.SetOnDiskId(onDiskId); - ROOT::Internal::CallConnectPageSourceOnField(modelField, *fPageSource); - } - - fEntry->SetFieldValidity(fieldIdx, true); } - ROOT::Internal::SetAllowFieldSubstitutions(fieldZero, false); } void ROOT::Experimental::RNTupleSingleProcessor::AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, @@ -286,7 +305,6 @@ void ROOT::Experimental::RNTupleChainProcessor::Initialize( fEntry = entry; fInnerProcessors[0]->Initialize(fEntry); - fProtoModel = fInnerProcessors[0]->GetProtoModel().Clone(); } ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::GetNEntries() @@ -323,12 +341,12 @@ void ROOT::Experimental::RNTupleChainProcessor::ConnectInnerProcessor(std::size_ innerProc->Connect(fFieldIdxs, fProvenance, /*updateFields=*/true); } -ROOT::RResult -ROOT::Experimental::RNTupleChainProcessor::AddFieldToEntry(std::string_view fieldName, void *valuePtr, +ROOT::Experimental::Internal::RNTupleProcessorEntry::FieldIndex_t +ROOT::Experimental::RNTupleChainProcessor::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, + void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance) { - return R__FORWARD_RESULT( - fInnerProcessors[fCurrentProcessorNumber]->AddFieldToEntry(fieldName, valuePtr, provenance)); + return fInnerProcessors[fCurrentProcessorNumber]->AddFieldToEntry(fieldName, typeName, valuePtr, provenance); } ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::LoadEntry(ROOT::NTupleSize_t entryNumber) @@ -430,34 +448,22 @@ void ROOT::Experimental::RNTupleJoinProcessor::Initialize( fPrimaryProcessor->Initialize(fEntry); fAuxiliaryProcessor->Initialize(fEntry); - // If the primaryProcessor has a field with the name of the auxProcessor (either as a "proper" field or because the - // primary processor itself is a join where its auxProcessor bears the same name as the current auxProcessor), there - // will be name conflicts, so error out. - if (auto &primaryModel = fPrimaryProcessor->GetProtoModel(); - primaryModel.GetFieldNames().find(fAuxiliaryProcessor->GetProcessorName()) != - primaryModel.GetFieldNames().end()) { - throw RException(R__FAIL("a field or nested auxiliary processor named \"" + - fAuxiliaryProcessor->GetProcessorName() + - "\" is already present as a field in the primary processor; rename the auxiliary " - "processor to avoid conflicts")); - } - - SetProtoModel(fPrimaryProcessor->GetProtoModel().Clone(), fAuxiliaryProcessor->GetProtoModel().Clone()); - if (!fJoinFieldNames.empty()) { for (const auto &joinField : fJoinFieldNames) { + if (!fPrimaryProcessor->CanReadFieldFromDisk(joinField)) { + throw RException(R__FAIL("could not find join field \"" + joinField + "\" in primary processor \"" + + fPrimaryProcessor->GetProcessorName() + "\"")); + } if (!fAuxiliaryProcessor->CanReadFieldFromDisk(joinField)) { throw RException(R__FAIL("could not find join field \"" + joinField + "\" in auxiliary processor \"" + fAuxiliaryProcessor->GetProcessorName() + "\"")); } + // We prepend the name of the primary processor in this case to prevent reading from the wrong join field in // composed join operations. - auto fieldIdx = AddFieldToEntry(fProcessorName + "." + joinField, nullptr, + auto fieldIdx = AddFieldToEntry(fProcessorName + "._join." + joinField, "std::uint64_t", nullptr, Internal::RNTupleProcessorProvenance(fProcessorName)); - if (!fieldIdx) - throw RException(R__FAIL("could not find join field \"" + joinField + "\" in primary processor \"" + - fPrimaryProcessor->GetProcessorName() + "\"")); - fJoinFieldIdxs.insert(fieldIdx.Unwrap()); + fJoinFieldIdxs.insert(fieldIdx); } fJoinTable = Internal::RNTupleJoinTable::Create(fJoinFieldNames); @@ -472,7 +478,7 @@ void ROOT::Experimental::RNTupleJoinProcessor::Connect( auto auxProvenance = provenance.Evolve(fAuxiliaryProcessor->GetProcessorName()); for (const auto &fieldIdx : fieldIdxs) { - auto fieldProvenance = fEntry->GetFieldProvenance(fieldIdx); + const auto &fieldProvenance = fEntry->GetFieldProvenance(fieldIdx); if (fieldProvenance.Contains(auxProvenance)) fAuxiliaryFieldIdxs.insert(fieldIdx); else @@ -483,53 +489,34 @@ void ROOT::Experimental::RNTupleJoinProcessor::Connect( fAuxiliaryProcessor->Connect(fAuxiliaryFieldIdxs, auxProvenance, updateFields); } -void ROOT::Experimental::RNTupleJoinProcessor::SetProtoModel(std::unique_ptr primaryModel, - std::unique_ptr auxModel) -{ - fProtoModel = std::move(primaryModel); - fProtoModel->Unfreeze(); - - // Create an anonymous record field for the auxiliary processor, containing its top-level fields. These original - // top-level fields are registered as subfields in this processor's proto-model, such that they can be accessed as - // `auxNTupleName.fieldName`. - std::vector> auxFields; - auxFields.reserve(auxModel->GetFieldNames().size()); - - for (const auto &fieldName : auxModel->GetFieldNames()) { - auxFields.emplace_back(auxModel->GetConstField(fieldName).Clone(fieldName)); - } - - auto auxParentField = std::make_unique(fAuxiliaryProcessor->GetProcessorName(), - std::move(auxFields)); - const auto &subFields = auxParentField->GetConstSubfields(); - fProtoModel->AddField(std::move(auxParentField)); - - for (const auto &field : subFields) { - fProtoModel->RegisterSubfield(field->GetQualifiedFieldName()); - - if (field->GetTypeName() == "RAuxiliaryProcessorField") { - for (const auto &auxSubfield : field->GetConstSubfields()) { - fProtoModel->RegisterSubfield(auxSubfield->GetQualifiedFieldName()); - } - } - } -} - -ROOT::RResult -ROOT::Experimental::RNTupleJoinProcessor::AddFieldToEntry(std::string_view fieldName, void *valuePtr, +ROOT::Experimental::Internal::RNTupleProcessorEntry::FieldIndex_t +ROOT::Experimental::RNTupleJoinProcessor::AddFieldToEntry(const std::string &fieldName, const std::string &typeName, + void *valuePtr, const Internal::RNTupleProcessorProvenance &provenance) { auto auxProvenance = provenance.Evolve(fAuxiliaryProcessor->GetProcessorName()); if (auxProvenance.IsPresentInFieldName(fieldName)) { - auto fieldIdx = fAuxiliaryProcessor->AddFieldToEntry(fieldName, valuePtr, auxProvenance); + // If the primaryProcessor has a field with the name of the auxProcessor (either as a "proper" field or because + // the primary processor itself is a join where its auxProcessor bears the same name as the current auxProcessor), + // there will be name conflicts, so error out. + if (fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) { + throw RException(R__FAIL("ambiguous field name: \"" + fieldName + + "\" is present in the primary RNTupleProcessor \"" + + fPrimaryProcessor->GetProcessorName() + + "\", but may also refer to a field in the auxiliary RNTupleProcessor named \"" + + fAuxiliaryProcessor->GetProcessorName() + + "\". To avoid this ambiguity, rename the auxiliary RNTupleProcessor.")); + } + + auto fieldIdx = fAuxiliaryProcessor->AddFieldToEntry(fieldName, typeName, valuePtr, auxProvenance); if (fieldIdx) - fAuxiliaryFieldIdxs.insert(fieldIdx.Unwrap()); - return R__FORWARD_RESULT(fieldIdx); + fAuxiliaryFieldIdxs.insert(fieldIdx); + return fieldIdx; } else { - auto fieldIdx = fPrimaryProcessor->AddFieldToEntry(fieldName, valuePtr, provenance); + auto fieldIdx = fPrimaryProcessor->AddFieldToEntry(fieldName, typeName, valuePtr, provenance); if (fieldIdx) - fFieldIdxs.insert(fieldIdx.Unwrap()); - return R__FORWARD_RESULT(fieldIdx); + fFieldIdxs.insert(fieldIdx); + return fieldIdx; } } diff --git a/tree/ntuple/test/ntuple_processor.cxx b/tree/ntuple/test/ntuple_processor.cxx index c4a4ef71e43d9..61660081ec8c3 100644 --- a/tree/ntuple/test/ntuple_processor.cxx +++ b/tree/ntuple/test/ntuple_processor.cxx @@ -94,12 +94,14 @@ class RNTupleProcessorTest : public testing::Test { auto fldI = model->MakeField("i"); auto fldX = model->MakeField("x"); auto fldY = model->MakeField>("y"); + auto fldStruct = model->MakeField("struct"); auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[0], fFileNames[0]); for (unsigned i = 0; i < 5; i++) { *fldI = i; *fldX = static_cast(i); *fldY = {static_cast(i), static_cast(i * 2)}; + fldStruct->a = i * 1.f; ntuple->Fill(); } } @@ -107,11 +109,13 @@ class RNTupleProcessorTest : public testing::Test { auto model = RNTupleModel::Create(); auto fldI = model->MakeField("i"); auto fldZ = model->MakeField("z"); + auto fldStruct = model->MakeField("struct"); auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[1], fFileNames[1]); for (unsigned i = 0; i < 5; ++i) { *fldI = i; *fldZ = i * 2.f; + fldStruct->a = i * 2.f; ntuple->Fill(); } } @@ -120,11 +124,14 @@ class RNTupleProcessorTest : public testing::Test { auto model = RNTupleModel::Create(); auto fldI = model->MakeField("i"); auto fldZ = model->MakeField("z"); + auto fldStruct = model->MakeField("struct"); auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[2], fFileNames[2]); for (int i = 4; i >= 0; --i) { *fldI = i; *fldZ = i * 3.f; + fldStruct->a = i * 3.f; + ntuple->Fill(); } } @@ -133,6 +140,7 @@ class RNTupleProcessorTest : public testing::Test { auto model = RNTupleModel::Create(); auto fldI = model->MakeField("i"); auto fldZ = model->MakeField("z"); + auto fldStruct = model->MakeField("struct"); auto ntuple = RNTupleWriter::Recreate(std::move(model), fNTupleNames[3], fFileNames[3]); for (unsigned i = 0; i < 5; ++i) { @@ -140,6 +148,7 @@ class RNTupleProcessorTest : public testing::Test { continue; *fldI = i; *fldZ = i * 4.f; + fldStruct->a = i * 4.f; ntuple->Fill(); } } @@ -222,6 +231,48 @@ TEST_F(RNTupleProcessorTest, RequestFieldWithVoidPtr) } } +TEST_F(RNTupleProcessorTest, AlternativeTypes) +{ + auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); + + auto xAsDouble = proc->RequestField("x"); + auto xAsFloat = proc->RequestField("x"); + + try { + proc->RequestField("x"); + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("in-memory field x of type std::string is incompatible with " + "on-disk field x: incompatible on-disk type name float")); + } + + auto yAsRVec = proc->RequestField>("y"); + + for (auto idx : *proc) { + EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); + + EXPECT_FLOAT_EQ(static_cast(idx), *xAsDouble); + EXPECT_FLOAT_EQ(idx, *xAsFloat); + + ROOT::RVec yExp{static_cast(idx), static_cast((idx) * 2)}; + for (std::size_t i = 0ul; i < yAsRVec->size(); ++i) { + EXPECT_FLOAT_EQ(yExp[i], (*yAsRVec)[i]); + } + } +} + +TEST_F(RNTupleProcessorTest, Subfields) +{ + auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); + + auto strct = proc->RequestField("struct"); + auto strct_a = proc->RequestField("struct.a"); + + for (auto idx : *proc) { + EXPECT_FLOAT_EQ(idx, idx); + EXPECT_FLOAT_EQ(strct->a, *strct_a); + } +} + TEST_F(RNTupleProcessorTest, PrintStructureSingle) { auto proc = RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]}); @@ -240,33 +291,53 @@ TEST_F(RNTupleProcessorTest, ChainedChain) { std::vector> innerProcs; innerProcs.push_back( - RNTupleProcessor::CreateChain({{fNTupleNames[0], fFileNames[0]}, {fNTupleNames[0], fFileNames[0]}})); - innerProcs.push_back(RNTupleProcessor::Create({fNTupleNames[0], fFileNames[0]})); + RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}})); + innerProcs.push_back( + RNTupleProcessor::CreateChain({{fNTupleNames[1], fFileNames[1]}, {fNTupleNames[2], fFileNames[2]}})); auto proc = RNTupleProcessor::CreateChain(std::move(innerProcs)); auto i = proc->RequestField("i"); - auto x = proc->RequestField("x"); + auto z = proc->RequestField("z"); + auto strct_a = proc->RequestField("struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - EXPECT_EQ(static_cast(*i), *x); + if ((idx >= 5 && idx < 10) || idx >= 15) { + EXPECT_EQ(*i, 4 - idx % 5); + EXPECT_EQ(*z, (4 - idx % 5) * 3.f); + } else { + EXPECT_EQ(*i, idx % 5); + EXPECT_EQ(*z, (idx % 5) * 2.f); + } + + EXPECT_EQ(*strct_a, *z); } - EXPECT_EQ(15, proc->GetNEntriesProcessed()); + EXPECT_EQ(20, proc->GetNEntriesProcessed()); - auto xPtr = std::make_shared(); - x.BindRawPtr(xPtr.get()); + auto zPtr = std::make_shared(); + z.BindRawPtr(zPtr.get()); + auto aPtr = std::make_shared(); + strct_a.BindRawPtr(aPtr.get()); for (auto idx : *proc) { - EXPECT_EQ(idx + 1 + 15, proc->GetNEntriesProcessed()); + EXPECT_EQ(idx + 1 + 20, proc->GetNEntriesProcessed()); EXPECT_EQ(idx, proc->GetCurrentEntryNumber()); - EXPECT_EQ(*i, proc->GetCurrentEntryNumber() % 5); - EXPECT_EQ(static_cast(*i), *x); - EXPECT_EQ(x.GetPtr().get(), xPtr.get()); + + if ((idx >= 5 && idx < 10) || idx >= 15) { + EXPECT_EQ(*i, 4 - idx % 5); + EXPECT_EQ(*z, (4 - idx % 5) * 3.f); + } else { + EXPECT_EQ(*i, idx % 5); + EXPECT_EQ(*z, (idx % 5) * 2.f); + } + + EXPECT_EQ(*strct_a, *z); + EXPECT_EQ(z.GetPtr().get(), zPtr.get()); + EXPECT_EQ(strct_a.GetPtr().get(), aPtr.get()); } - EXPECT_EQ(30, proc->GetNEntriesProcessed()); + EXPECT_EQ(40, proc->GetNEntriesProcessed()); } TEST_F(RNTupleProcessorTest, ChainedJoin) @@ -282,6 +353,7 @@ TEST_F(RNTupleProcessorTest, ChainedJoin) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z = proc->RequestField("ntuple_aux.z"); + auto strct_a = proc->RequestField("ntuple_aux.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -290,6 +362,7 @@ TEST_F(RNTupleProcessorTest, ChainedJoin) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 2, *z); + EXPECT_EQ(*z, *strct_a); } EXPECT_EQ(10, proc->GetNEntriesProcessed()); } @@ -307,6 +380,7 @@ TEST_F(RNTupleProcessorTest, ChainedJoinUnaligned) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z = proc->RequestField("ntuple_aux.z"); + auto strct_a = proc->RequestField("ntuple_aux.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -315,6 +389,7 @@ TEST_F(RNTupleProcessorTest, ChainedJoinUnaligned) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 3, *z); + EXPECT_EQ(*z, *strct_a); } EXPECT_EQ(10, proc->GetNEntriesProcessed()); } @@ -332,6 +407,7 @@ TEST_F(RNTupleProcessorTest, ChainedJoinMissingEntries) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z = proc->RequestField("ntuple_aux.z"); + auto strct_a = proc->RequestField("ntuple_aux.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -342,9 +418,12 @@ TEST_F(RNTupleProcessorTest, ChainedJoinMissingEntries) if ((idx % 5) % 2 == 1) { EXPECT_FALSE(z.HasValue()); + EXPECT_FALSE(strct_a.HasValue()); } else { EXPECT_TRUE(z.HasValue()); + EXPECT_TRUE(strct_a.HasValue()); EXPECT_EQ(*x * 4, *z); + EXPECT_EQ(*z, *strct_a); } } EXPECT_EQ(10, proc->GetNEntriesProcessed()); @@ -363,6 +442,7 @@ TEST_F(RNTupleProcessorTest, JoinedChain) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z = proc->RequestField("ntuple_aux.z"); + auto strct_a = proc->RequestField("ntuple_aux.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -371,6 +451,7 @@ TEST_F(RNTupleProcessorTest, JoinedChain) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 2, *z); + EXPECT_EQ(*z, *strct_a); } EXPECT_EQ(10, proc->GetNEntriesProcessed()); } @@ -388,6 +469,7 @@ TEST_F(RNTupleProcessorTest, JoinedChainUnaligned) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z = proc->RequestField("ntuple_aux.z"); + auto strct_a = proc->RequestField("ntuple_aux.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -396,6 +478,7 @@ TEST_F(RNTupleProcessorTest, JoinedChainUnaligned) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 3, *z); + EXPECT_EQ(*z, *strct_a); } EXPECT_EQ(10, proc->GetNEntriesProcessed()); } @@ -413,6 +496,7 @@ TEST_F(RNTupleProcessorTest, JoinedChainMissingEntries) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z = proc->RequestField("ntuple_aux.z"); + auto strct_a = proc->RequestField("ntuple_aux.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -423,9 +507,12 @@ TEST_F(RNTupleProcessorTest, JoinedChainMissingEntries) if ((idx % 5) % 2 == 1) { EXPECT_FALSE(z.HasValue()); + EXPECT_FALSE(strct_a.HasValue()); } else { EXPECT_TRUE(z.HasValue()); + EXPECT_TRUE(strct_a.HasValue()); EXPECT_EQ(*x * 4, *z); + EXPECT_EQ(*z, *strct_a); } } EXPECT_EQ(10, proc->GetNEntriesProcessed()); @@ -443,7 +530,9 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedPrimary) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z1 = proc->RequestField("ntuple_aux.z"); + auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); auto z2 = proc->RequestField("ntuple_aux2.z"); + auto strct_a2 = proc->RequestField("ntuple_aux2.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -452,7 +541,9 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedPrimary) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); EXPECT_EQ(*x * 3, *z2); + EXPECT_EQ(*x * 3, *strct_a2); } EXPECT_EQ(5, proc->GetNEntriesProcessed()); } @@ -469,7 +560,9 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedPrimaryMissingEntries) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z1 = proc->RequestField("ntuple_aux.z"); + auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); auto z2 = proc->RequestField("ntuple_aux2.z"); + auto strct_a2 = proc->RequestField("ntuple_aux2.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -478,12 +571,16 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedPrimaryMissingEntries) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); if (idx % 2 == 1) { EXPECT_FALSE(z2.HasValue()); + EXPECT_FALSE(strct_a2.HasValue()); } else { EXPECT_TRUE(z2.HasValue()); + EXPECT_TRUE(strct_a2.HasValue()); EXPECT_EQ(*x * 4, *z2); + EXPECT_EQ(*x * 4, *strct_a2); } } EXPECT_EQ(5, proc->GetNEntriesProcessed()); @@ -503,7 +600,9 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedAuxiliary) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z1 = proc->RequestField("ntuple_aux.z"); + auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); auto z2 = proc->RequestField("ntuple_aux.ntuple_aux2.z"); + auto strct_a2 = proc->RequestField("ntuple_aux.ntuple_aux2.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -512,7 +611,9 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedAuxiliary) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); EXPECT_EQ(*x * 3, *z2); + EXPECT_EQ(*x * 3, *strct_a2); } EXPECT_EQ(5, proc->GetNEntriesProcessed()); @@ -532,7 +633,9 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedAuxiliaryMissingEntries) auto i = proc->RequestField("i"); auto x = proc->RequestField("x"); auto z1 = proc->RequestField("ntuple_aux.z"); + auto strct_a1 = proc->RequestField("ntuple_aux.struct.a"); auto z2 = proc->RequestField("ntuple_aux.ntuple_aux2.z"); + auto strct_a2 = proc->RequestField("ntuple_aux.ntuple_aux2.struct.a"); for (auto idx : *proc) { EXPECT_EQ(idx + 1, proc->GetNEntriesProcessed()); @@ -541,12 +644,16 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedAuxiliaryMissingEntries) EXPECT_EQ(static_cast(*i), *x); EXPECT_EQ(*x * 2, *z1); + EXPECT_EQ(*x * 2, *strct_a1); if (idx % 2 == 1) { EXPECT_FALSE(z2.HasValue()); + EXPECT_FALSE(strct_a2.HasValue()); } else { EXPECT_TRUE(z2.HasValue()); + EXPECT_TRUE(strct_a2.HasValue()); EXPECT_EQ(*x * 4, *z2); + EXPECT_EQ(*x * 4, *strct_a2); } } @@ -558,14 +665,18 @@ TEST_F(RNTupleProcessorTest, JoinedJoinComposedSameName) auto primaryProc = RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {}); + auto auxProc = RNTupleProcessor::Create({fNTupleNames[2], fFileNames[2]}); + auto proc = RNTupleProcessor::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}); + try { - auto auxProc = RNTupleProcessor::Create({fNTupleNames[2], fFileNames[2]}); - auto proc = RNTupleProcessor::CreateJoin(std::move(primaryProc), std::move(auxProc), {"i"}); + proc->RequestField("ntuple_aux.z"); + + FAIL() << "creating an auxiliary processor where its name causes conflicts should throw"; } catch (const ROOT::RException &err) { - EXPECT_THAT( - err.what(), - testing::HasSubstr("a field or nested auxiliary processor named \"ntuple_aux\" is already present as a field " - "in the primary processor; rename the auxiliary processor to avoid conflicts")); + EXPECT_THAT(err.what(), testing::HasSubstr( + "ambiguous field name: \"ntuple_aux.z\" is present in the primary RNTupleProcessor " + "\"ntuple\", but may also refer to a field in the auxiliary RNTupleProcessor named " + "\"ntuple_aux\". To avoid this ambiguity, rename the auxiliary RNTupleProcessor.")); } } diff --git a/tree/ntuple/test/ntuple_processor_join.cxx b/tree/ntuple/test/ntuple_processor_join.cxx index 442d90bb5a397..5bd235cbae4b1 100644 --- a/tree/ntuple/test/ntuple_processor_join.cxx +++ b/tree/ntuple/test/ntuple_processor_join.cxx @@ -123,6 +123,47 @@ TEST_F(RNTupleJoinProcessorTest, IdenticalFieldNames) EXPECT_EQ(10, proc->GetNEntriesProcessed()); } +TEST(RNTupleJoinProcessor, NameConflict) +{ + FileRaii fileGuard("ntuple_processor_join_name_conflict.root"); + { + auto model = RNTupleModel::Create(); + auto fldStruct = model->MakeField("struct"); + + auto file = std::unique_ptr(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE")); + auto ntuple = RNTupleWriter::Append(std::move(model), "ntuple", *file); + + for (unsigned i = 0; i < 5; ++i) { + fldStruct->a = i; + ntuple->Fill(); + } + } + { + auto model = RNTupleModel::Create(); + auto fldB = model->MakeField("b"); + + auto file = std::unique_ptr(TFile::Open(fileGuard.GetPath().c_str(), "UPDATE")); + auto ntuple = RNTupleWriter::Append(std::move(model), "struct", *file); + + for (unsigned i = 0; i < 5; ++i) { + *fldB = i * 2; + ntuple->Fill(); + } + } + + auto proc = RNTupleProcessor::CreateJoin({"ntuple", fileGuard.GetPath()}, {"struct", fileGuard.GetPath()}, {}); + + try { + proc->RequestField("struct.a"); + } catch (const ROOT::RException &err) { + EXPECT_THAT( + err.what(), + testing::HasSubstr("ambiguous field name: \"struct.a\" is present in the primary RNTupleProcessor \"ntuple\", " + "but may also refer to a field in the auxiliary RNTupleProcessor named \"struct\". To " + "avoid this ambiguity, rename the auxiliary RNTupleProcessor.")); + } +} + TEST_F(RNTupleJoinProcessorTest, UnalignedSingleJoinField) { auto proc = RNTupleProcessor::CreateJoin({fNTupleNames[0], fFileNames[0]}, {fNTupleNames[1], fFileNames[1]}, {"i"});