Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 36 additions & 45 deletions tree/ntuple/inc/ROOT/RNTupleProcessor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ class RNTupleProcessor {

protected:
std::string fProcessorName;
std::unique_ptr<ROOT::RNTupleModel> fProtoModel = nullptr;
std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr;
std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs;

Expand All @@ -263,13 +262,12 @@ protected:
std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor

/////////////////////////////////////////////////////////////////////////////
/// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
/// an existing one.
/// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0;

/////////////////////////////////////////////////////////////////////////////
/// \brief Check if the processor already has been initialized.
bool IsInitialized() const { return fProtoModel && fEntry; }
bool IsInitialized() const { return fEntry != nullptr; }

/////////////////////////////////////////////////////////////////////////////
/// \brief Connect fields to the page source of the processor's underlying RNTuple(s).
Expand All @@ -289,17 +287,6 @@ protected:
/// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0;

/////////////////////////////////////////////////////////////////////////////
/// \brief Get the proto model used by the processor.
///
/// A processor's proto model contains all fields that can be accessed and is inferred from the descriptors of the
/// underlying RNTuples. It is used in RequestField() to check that the requested field is actually valid.
const ROOT::RNTupleModel &GetProtoModel() const
{
assert(fProtoModel);
return *fProtoModel;
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Get the total number of entries in this processor
virtual ROOT::NTupleSize_t GetNEntries() = 0;
Expand All @@ -315,15 +302,16 @@ protected:
///
///
/// \param[in] fieldName Name of the field to add.
/// \param[in] typeName Type of the field to add.
/// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
/// created.
/// \param[in] provenance Provenance of the processor.
///
/// \return The index of the newly added field in the entry.
///
/// In case the field was already present in the entry, the index of the existing field is returned.
virtual ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t>
AddFieldToEntry(std::string_view fieldName, void *valuePtr,
virtual Internal::RNTupleProcessorEntry::FieldIndex_t
AddFieldToEntry(const std::string &fieldName, const std::string &typeName, void *valuePtr,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please split all the std::string_view -> const std::string& changes into a separate commit?

const Internal::RNTupleProcessorProvenance &provenance) = 0;

/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -383,6 +371,8 @@ public:
/// \tparam T Type of the requested field.
///
/// \param[in] fieldName Name of the requested field.
/// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
/// created.
///
/// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value.
///
Expand All @@ -392,11 +382,14 @@ public:
/// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
/// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
template <typename T>
RNTupleProcessorOptionalPtr<T> RequestField(std::string_view fieldName, void *valuePtr = nullptr)
RNTupleProcessorOptionalPtr<T> RequestField(const std::string &fieldName, void *valuePtr = nullptr)
{
Initialize(fEntry);
// TODO handle alternative (compatible field types)
auto fieldIdx = AddFieldToEntry(fieldName, valuePtr, Internal::RNTupleProcessorProvenance()).Unwrap();
std::string typeName{};
if constexpr (!std::is_void_v<T>) {
typeName = ROOT::Internal::GetRenormalizedTypeName(typeid(T));
}
auto fieldIdx = AddFieldToEntry(fieldName, typeName, valuePtr, Internal::RNTupleProcessorProvenance());
return RNTupleProcessorOptionalPtr<T>(fEntry.get(), fieldIdx);
}

Expand Down Expand Up @@ -563,8 +556,18 @@ private:
std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;

/////////////////////////////////////////////////////////////////////////////
/// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
/// an existing one.
/// \brief Create a new field and connect it to the processor's page source.
///
/// \param[in] qualifiedFieldName Name of the field to add, prefixed with its parent fields, if applicable.
/// \param[in] typeName Type of the field to add.
///
/// \return The newly created field.
/// \throws ROOT::RException In case the requested field cannot be found on disk.
std::unique_ptr<ROOT::RFieldBase>
CreateAndConnectField(const std::string &qualifiedFieldName, const std::string &typeName);

/////////////////////////////////////////////////////////////////////////////
/// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
///
/// At this point, the page source for the underlying RNTuple of the processor will be created and opened.
void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
Expand Down Expand Up @@ -602,8 +605,8 @@ private:
/// \brief Add a field to the entry.
///
/// \sa RNTupleProcessor::AddFieldToEntry()
ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry(
std::string_view fieldName, void *valuePtr = nullptr,
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(
const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final;

/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -633,8 +636,9 @@ public:
RNTupleSingleProcessor &operator=(RNTupleSingleProcessor &&) = delete;
~RNTupleSingleProcessor() override
{
// The proto model needs to be deleted before fPageSource.
fProtoModel.release();
// The entry's fields need to be deleted before fPageSource.
if (fEntry)
fEntry->Clear();
};
};

Expand All @@ -655,8 +659,7 @@ private:
Internal::RNTupleProcessorProvenance fProvenance;

/////////////////////////////////////////////////////////////////////////////
/// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
/// an existing one.
/// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;

/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -697,8 +700,8 @@ private:
/// \brief Add a field to the entry.
///
/// \sa RNTupleProcessor::AddFieldToEntry()
ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry(
std::string_view fieldName, void *valuePtr = nullptr,
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(
const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final;

/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -753,8 +756,7 @@ private:

std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs;

/// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
/// an existing one.
/// \brief Initialize the processor by creating an (initially empty) `fEntry`, or setting an existing one.
void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;

/////////////////////////////////////////////////////////////////////////////
Expand All @@ -775,17 +777,6 @@ private:
/// \brief Get the total number of entries in this processor.
ROOT::NTupleSize_t GetNEntries() final;

/////////////////////////////////////////////////////////////////////////////
/// \brief Set the processor's proto model by combining the primary and auxiliary models.
///
/// \param[in] primaryModel The proto model of the primary processor.
/// \param[in] auxModel The proto model of the auxiliary processors.
///
/// To prevent field name clashes when one or more models have fields with duplicate names, fields from each
/// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model.
/// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`.
void SetProtoModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel, std::unique_ptr<ROOT::RNTupleModel> auxModel);

/////////////////////////////////////////////////////////////////////////////
/// \brief Set the validity for all fields in the auxiliary processor at once.
void SetAuxiliaryFieldValidity(bool validity);
Expand All @@ -809,8 +800,8 @@ private:
/// \brief Add a field to the entry.
///
/// \sa RNTupleProcessor::AddFieldToEntry()
ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry(
std::string_view fieldName, void *valuePtr = nullptr,
Internal::RNTupleProcessorEntry::FieldIndex_t AddFieldToEntry(
const std::string &fieldName, const std::string &typeName, void *valuePtr = nullptr,
const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final;

/////////////////////////////////////////////////////////////////////////////
Expand Down
91 changes: 66 additions & 25 deletions tree/ntuple/inc/ROOT/RNTupleProcessorEntry.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -97,20 +97,36 @@ public:

private:
struct RProcessorValue {
std::unique_ptr<ROOT::RFieldBase> fField;
std::string fQualifiedFieldName;
ROOT::RFieldBase::RValue fValue;
bool fIsValid;
RNTupleProcessorProvenance fProcessorProvenance;

RProcessorValue(ROOT::RFieldBase::RValue &&value, bool isValid, RNTupleProcessorProvenance provenance)
: fValue(std::move(value)), fIsValid(isValid), fProcessorProvenance(provenance)
RProcessorValue(std::unique_ptr<ROOT::RFieldBase> field, std::string_view qualifiedFieldName,
ROOT::RFieldBase::RValue &&value, bool isValid, RNTupleProcessorProvenance provenance)
: fField(std::move(field)),
fQualifiedFieldName(qualifiedFieldName),
fValue(std::move(value)),
fIsValid(isValid),
fProcessorProvenance(provenance)
{
}
};

std::vector<RProcessorValue> fProcessorValues;
std::unordered_map<std::string, FieldIndex_t> fFieldName2Index;
// Maps from the field name to all type alternatives for that field that have been added to the entry.
std::unordered_map<std::string, std::unordered_set<FieldIndex_t>> fFieldName2Index;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about using an unordered_set here: unless we expect hundreds of alternative types for a single field, a vector is probably gonna be much faster (especially if the most common case is a single element in it)


public:
/////////////////////////////////////////////////////////////////////////////
/// \brief Clear all fields from the entry.
void Clear()
{
fProcessorValues.clear();
fFieldName2Index.clear();
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Set the validity of a field, i.e. whether it is possible to read its value in the current entry.
///
Expand Down Expand Up @@ -143,7 +159,7 @@ public:
assert(fieldIdx < fProcessorValues.size());

for (const auto &[fieldName, index] : fFieldName2Index) {
if (index == fieldIdx) {
if (index.find(fieldIdx) != index.end()) {
return fieldName;
}
}
Expand All @@ -156,41 +172,59 @@ public:
/////////////////////////////////////////////////////////////////////////////
/// \brief Find the field index of the provided field in the entry.
///
/// \param[in] fieldName The name of the field in the entry.
/// \param[in] canonicalFieldName The name of the field in the entry, including its processor name prefixes and
/// parent field names, if applicable.
///
/// \return A `std::optional` containing the field index if it was found.
std::optional<FieldIndex_t> FindFieldIndex(std::string_view fieldName) const
std::optional<FieldIndex_t> FindFieldIndex(std::string_view canonicalFieldName, std::string_view typeName) const
{
auto it = fFieldName2Index.find(std::string(fieldName));
auto it = fFieldName2Index.find(std::string(canonicalFieldName));
if (it == fFieldName2Index.end()) {
return std::nullopt;
}
return it->second;

const auto &fieldIdxs = it->second;
assert(!fieldIdxs.empty());

for (auto idx : fieldIdxs) {
if (fProcessorValues[idx].fField->GetTypeName() == typeName) {
return idx;
}
}

return std::nullopt;
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Add a new field to the entry.
///
/// \param[in] fieldName Name of the field to add.
/// \param[in] qualifiedFieldName Name of the field to add, including its parent field if applicable.
/// \param[in] field Reference to the field to add, used to to create its corresponding RValue.
/// \param[in] valuePtr Pointer to an object corresponding to the field's type to bind to its value. If this is a
/// `nullptr`, a pointer will be created.
/// \param[in] provenance Processor provenance of the field.
///
/// \return The field index of the newly added field.
FieldIndex_t AddField(std::string_view fieldName, ROOT::RFieldBase &field, void *valuePtr,
FieldIndex_t AddField(const std::string &qualifiedFieldName, std::unique_ptr<ROOT::RFieldBase> field, void *valuePtr,
const RNTupleProcessorProvenance &provenance)
{
if (FindFieldIndex(fieldName))
auto fieldNameWithProcessorPrefix = qualifiedFieldName;
if (const auto &processorPrefix = provenance.Get(); !processorPrefix.empty())
fieldNameWithProcessorPrefix = processorPrefix + "." + qualifiedFieldName;

if (FindFieldIndex(fieldNameWithProcessorPrefix, field->GetTypeName()))
throw ROOT::RException(
R__FAIL("field \"" + field.GetQualifiedFieldName() + "\" is already present in the entry"));
R__FAIL("field \"" + fieldNameWithProcessorPrefix + "\" is already present in the entry"));

auto value = field.CreateValue();
auto fieldIdx = fProcessorValues.size();
fFieldName2Index[fieldNameWithProcessorPrefix].insert(fieldIdx);

assert(field);
auto value = field->CreateValue();
if (valuePtr)
value.BindRawPtr(valuePtr);
auto fieldIdx = fProcessorValues.size();
fFieldName2Index[std::string(fieldName)] = fieldIdx;
fProcessorValues.emplace_back(RProcessorValue(std::move(value), true, provenance));
fProcessorValues.emplace_back(
RProcessorValue(std::move(field), qualifiedFieldName, std::move(value), true, provenance));

return fieldIdx;
}
Expand All @@ -200,14 +234,22 @@ public:
///
/// \param[in] fieldIdx Index of the field to update.
/// \param[in] field The new field to use in the entry.
void UpdateField(FieldIndex_t fieldIdx, ROOT::RFieldBase &field)
void UpdateField(FieldIndex_t fieldIdx, std::unique_ptr<ROOT::RFieldBase> field)
{
assert(fieldIdx < fProcessorValues.size());

auto currValuePtr = fProcessorValues[fieldIdx].fValue.GetPtr<void>();
auto value = field.CreateValue();
value.Bind(currValuePtr);
fProcessorValues[fieldIdx].fValue = value;
auto &fieldInfo = fProcessorValues[fieldIdx];

if (field) {
auto newValue = field->CreateValue();
auto currValuePtr = fieldInfo.fValue.GetPtr<void>();
newValue.Bind(currValuePtr);
fieldInfo.fField = std::move(field);
fieldInfo.fValue = std::move(newValue);
fieldInfo.fIsValid = true;
} else {
fieldInfo.fIsValid = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this also reset fField and fValue?

}
}

/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -275,14 +317,13 @@ public:
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Get the name of a field in the entry, including processor name prefixes in the case of auxiliary fields.
/// \brief Get the name of a field in the entry, including its parent fields.
///
/// \param[in] fieldIdx The index of the field in the entry.
std::string GetFieldName(FieldIndex_t fieldIdx) const
std::string GetQualifiedFieldName(FieldIndex_t fieldIdx) const
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point this can probably return a const std::string &

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure, by adding more field / values later on, fProcessorValues may be forced to reallocate...

{
assert(fieldIdx < fProcessorValues.size());
return fProcessorValues[fieldIdx].fProcessorProvenance.Get() + "." +
fProcessorValues[fieldIdx].fValue.GetField().GetQualifiedFieldName();
return fProcessorValues[fieldIdx].fQualifiedFieldName;
}

/////////////////////////////////////////////////////////////////////////////
Expand Down
Loading
Loading