Skip to content
Merged
1 change: 1 addition & 0 deletions src/nwb2bids/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"Severity",
# Public submodules
"bids_models",
"sanitization",
"testing",
]

Expand Down
25 changes: 23 additions & 2 deletions src/nwb2bids/_command_line_interface/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .._core._convert_nwb_dataset import convert_nwb_dataset
from .._inspection._inspection_result import InspectionResult, Severity
from .._tools._pluralize import _pluralize
from ..sanitization import SanitizationConfig
from ..testing import generate_ephys_tutorial


Expand Down Expand Up @@ -62,6 +63,14 @@ def _nwb2bids_cli():
type=rich_click.Path(exists=True, dir_okay=False, readable=True),
default=None,
)
@rich_click.option(
"--sanitization",
help="Specifies types of sanitization to apply when creating the BIDS dataset.",
required=False,
type=rich_click.Choice(["sub-labels", "ses-labels"], case_sensitive=True),
multiple=True,
default=None,
)
@rich_click.option("--silent", "-s", is_flag=True, help="Suppress all console output.", default=False)
@rich_click.option(
"--run-id",
Expand All @@ -78,6 +87,7 @@ def _nwb2bids_cli():
def _run_convert_nwb_dataset(
nwb_paths: tuple[str, ...],
bids_directory: str | None = None,
sanitization: tuple[typing.Literal["sub-labels", "ses-labels"]] = (),
additional_metadata_file_path: str | None = None,
file_mode: typing.Literal["copy", "move", "symlink", "auto"] = "auto",
cache_directory: str | None = None,
Expand All @@ -94,12 +104,15 @@ def _run_convert_nwb_dataset(
message = "Please provide at least one NWB file or directory to convert."
raise ValueError(message)
handled_nwb_paths = [pathlib.Path(nwb_path) for nwb_path in nwb_paths]
# Convert CLI args to snake_case
sanitization_config = SanitizationConfig(**{value.replace("-", "_"): True for value in sanitization})
Comment thread
CodyCBakerPhD marked this conversation as resolved.

run_config_kwargs = {
"bids_directory": bids_directory,
"additional_metadata_file_path": additional_metadata_file_path,
"file_mode": file_mode,
"cache_directory": cache_directory,
"sanitization_config": sanitization_config,
"run_id": run_id,
}

Expand Down Expand Up @@ -149,8 +162,15 @@ def _run_convert_nwb_dataset(
rich_click.echo(message=console_notification)
return

sanitization_text = ""
if any(sanitization_config.model_dump().values()):
sanitization_text = (
"\n\nNote: Sanitization was applied to file and directory names during conversion. "
"Please review the converted BIDS dataset to ensure all names are appropriate.\n\n"
)

if criticals:
text = f"\nBIDS dataset was successfully created, but may not be valid!{notif_text}"
text = f"\nBIDS dataset was successfully created, but may not be valid!{sanitization_text}{notif_text}"
console_notification = rich_click.style(text=text, fg="yellow")
rich_click.echo(message=console_notification)
return
Expand All @@ -161,7 +181,8 @@ def _run_convert_nwb_dataset(

text += (
f'{number_of_notifications} {_pluralize(n=number_of_notifications, phrase="suggestion")} for improvement '
f'{_pluralize(n=number_of_notifications, phrase="was", plural="were")} found during conversion.{notif_text}'
f'{_pluralize(n=number_of_notifications, phrase="was", plural="were")} found during conversion.'
f"{sanitization_text}{notif_text}"
)
console_notification = rich_click.style(text=text, fg="green")
rich_click.echo(message=console_notification)
Expand Down
56 changes: 38 additions & 18 deletions src/nwb2bids/_converters/_dataset_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,13 @@ def extract_metadata(self) -> None:

def convert_to_bids_dataset(self) -> None:
"""Convert the directory of NWB files to a BIDS dataset."""
self.run_config.bids_directory.mkdir(exist_ok=True)

try:
self.write_dataset_description()
self.write_participants_metadata()
self.write_sessions_metadata()

for session_converter in self.session_converters:
session_converter.convert_to_bids_session()

self.write_participants_metadata()
self.write_sessions_metadata()
self.write_dataset_description()
except Exception: # noqa
message = InspectionResult(
title="Failed to convert to BIDS dataset",
Expand Down Expand Up @@ -277,12 +275,28 @@ def write_participants_metadata(self) -> None:
aggregated_data_frame = full_participants_data_frame.copy()

# Deduplicate all rows of the frame
deduplicated_data_frame = aggregated_data_frame.drop_duplicates(ignore_index=True)
deduplicated_data_frame = aggregated_data_frame.drop_duplicates(ignore_index=True).copy()

# Apply sanitization
sanitizations = [converter.session_metadata.sanitization for converter in self.session_converters]
sanitized_participant_ids = {
sanitization.original_participant_id: sanitization.sanitized_participant_id
for sanitization in sanitizations
}

with pandas.option_context("mode.chained_assignment", None):
deduplicated_data_frame["participant_id"] = (
deduplicated_data_frame["participant_id"]
.apply(lambda participant_id: sanitized_participant_ids[participant_id])
.astype("string")
)

# BIDS requires sub- prefix in table values
participants_data_frame = deduplicated_data_frame.copy()
participants_data_frame["participant_id"] = participants_data_frame["participant_id"].apply(
lambda participant_id: f"sub-{participant_id}"
participants_data_frame = deduplicated_data_frame.copy(deep=True)
participants_data_frame["participant_id"] = (
participants_data_frame["participant_id"]
.apply(lambda participant_id: f"sub-{participant_id}")
.astype("string")
)
is_field_in_table = {field: True for field in participants_data_frame.keys()}

Expand Down Expand Up @@ -321,29 +335,35 @@ def write_sessions_metadata(self) -> None:
"""
participant_id_to_sessions = collections.defaultdict(list)
for session_converter in self.session_converters:
participant_id_to_sessions[session_converter.session_metadata.participant.participant_id].append(
session_converter
participant_id_to_sessions[session_converter.session_metadata.sanitization.sanitized_participant_id].append(
session_converter.session_metadata
)

# TODO: expand beyond just session_id field (mainly via additional metadata)
sessions_schema = BidsSessionMetadata.model_json_schema()
sessions_json = {"session_id": sessions_schema["properties"]["session_id"]["description"]}

for participant_id, sessions_metadata in participant_id_to_sessions.items():
subject_directory = self.run_config.bids_directory / f"sub-{participant_id}"
sanitized_participant_id = participant_id
sanitized_session_ids = [
session_metadata.sanitization.sanitized_session_id for session_metadata in sessions_metadata
]

subject_directory = self.run_config.bids_directory / f"sub-{sanitized_participant_id}"
subject_directory.mkdir(exist_ok=True)

# BIDS requires ses- prefix in table values
session_ids = [session_converter.session_id for session_converter in sessions_metadata]
sessions_data_frame = pandas.DataFrame({"session_id": [f"ses-{session_id}" for session_id in session_ids]})
sessions_data_frame = pandas.DataFrame(
{"session_id": [f"ses-{session_id}" for session_id in sanitized_session_ids]}
)

session_tsv_file_path = subject_directory / f"sub-{participant_id}_sessions.tsv"
session_tsv_file_path = subject_directory / f"sub-{sanitized_participant_id}_sessions.tsv"
sessions_data_frame.to_csv(path_or_buf=session_tsv_file_path, mode="w", index=False, sep="\t")

session_json_file_path = subject_directory / f"sub-{participant_id}_sessions.json"
session_json_file_path = subject_directory / f"sub-{sanitized_participant_id}_sessions.json"
with session_json_file_path.open(mode="w") as file_stream:
json.dump(obj=sessions_json, fp=file_stream, indent=4)

for session_id in session_ids:
for session_id in sanitized_session_ids:
session_directory = subject_directory / f"ses-{session_id}"
session_directory.mkdir(exist_ok=True)
15 changes: 15 additions & 0 deletions src/nwb2bids/_converters/_run_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .._core._file_mode import _determine_file_mode
from .._core._home import _get_nwb2bids_home_directory
from .._core._validate_existing_bids import _validate_bids_directory
from ..sanitization import SanitizationConfig


def _generate_run_id() -> str:
Expand Down Expand Up @@ -45,6 +46,9 @@ class RunConfig(pydantic.BaseModel):
cache_directory : directory path
The directory where run specific files (e.g., notifications, sanitization reports) will be stored.
Defaults to `~/.nwb2bids`.
sanitization_config : nwb2bids.SanitizationConfig
Specifies the types of sanitization to apply when creating the BIDS dataset.
Read more about the specific options from `nwb2bids.sanitization.SanitizationConfig?`.
run_id : str
On each unique run of nwb2bids, a run ID is generated.
Set this option to override this to any identifying string.
Expand All @@ -64,6 +68,10 @@ class RunConfig(pydantic.BaseModel):
cache_directory: typing.Annotated[
pydantic.DirectoryPath, pydantic.Field(default_factory=_get_nwb2bids_home_directory)
]
sanitization_config: typing.Annotated[
SanitizationConfig,
pydantic.Field(default_factory=lambda _: SanitizationConfig()),
]
run_id: typing.Annotated[str, pydantic.Field(default_factory=_generate_run_id)]
_nwb2bids_directory: pathlib.Path = pydantic.PrivateAttr()

Expand All @@ -75,6 +83,13 @@ class RunConfig(pydantic.BaseModel):
def model_post_init(self, context: typing.Any, /) -> None:
self._nwb2bids_directory = self.bids_directory / ".nwb2bids"

@pydantic.computed_field
@property
def sanitization_file_path(self) -> pathlib.Path:
"""The file path leading to a record of sanitizations made."""
sanitization_file_path = self._nwb2bids_directory / f"{self.run_id}_sanitization.txt"
return sanitization_file_path

@pydantic.computed_field
@property
def notifications_file_path(self) -> pathlib.Path:
Expand Down
31 changes: 19 additions & 12 deletions src/nwb2bids/_converters/_session_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,16 @@ def from_nwb_paths(
return session_converters

def extract_metadata(self) -> None:
if self.session_metadata is None:
self.session_metadata = BidsSessionMetadata.from_nwbfile_paths(nwbfile_paths=self.nwbfile_paths)
self.messages += self.session_metadata.messages
if self.session_metadata is not None:
return

self.run_config.bids_directory.mkdir(exist_ok=True)
self.run_config._nwb2bids_directory.mkdir(exist_ok=True)

self.session_metadata = BidsSessionMetadata.from_nwbfile_paths(
nwbfile_paths=self.nwbfile_paths, run_config=self.run_config
)
self.messages += self.session_metadata.messages

def convert_to_bids_session(self) -> None:
"""
Expand All @@ -111,8 +118,8 @@ def convert_to_bids_session(self) -> None:
if self.session_metadata is None:
self.extract_metadata()

participant_id = self.session_metadata.participant.participant_id
session_id = self.session_id
participant_id = self.session_metadata.sanitization.sanitized_participant_id
session_id = self.session_metadata.sanitization.sanitized_session_id
file_prefix = f"sub-{participant_id}_ses-{session_id}"

self.write_ecephys_files()
Expand Down Expand Up @@ -140,7 +147,7 @@ def convert_to_bids_session(self) -> None:

def write_ecephys_files(self) -> None:
"""
Write the `_probes`, `_channels`, and `_electrodes` metadata files, both `.tsv` and `.json`, for this session .
Write the `_probes`, `_channels`, and `_electrodes` metadata files, both `.tsv` and `.json`, for this session.
"""
if len(self.nwbfile_paths) > 1:
message = "Conversion of multiple NWB files per session is not yet supported."
Expand All @@ -153,8 +160,8 @@ def write_ecephys_files(self) -> None:
):
return

participant_id = self.session_metadata.participant.participant_id
session_id = self.session_id
participant_id = self.session_metadata.sanitization.sanitized_participant_id
session_id = self.session_metadata.sanitization.sanitized_session_id
file_prefix = f"sub-{participant_id}_ses-{session_id}"

ecephys_directory = self._establish_ecephys_subdirectory()
Expand Down Expand Up @@ -188,8 +195,8 @@ def write_events_files(self) -> None:
message = "Conversion of multiple NWB files per session is not yet supported."
raise NotImplementedError(message)

participant_id = self.session_metadata.participant.participant_id
session_id = self.session_id
participant_id = self.session_metadata.sanitization.sanitized_participant_id
session_id = self.session_metadata.sanitization.sanitized_session_id
file_prefix = f"sub-{participant_id}_ses-{session_id}"

ecephys_directory = self._establish_ecephys_subdirectory()
Expand All @@ -201,8 +208,8 @@ def write_events_files(self) -> None:
self.session_metadata.events.to_json(file_path=session_events_metadata_file_path)

def _establish_ecephys_subdirectory(self) -> pathlib.Path:
participant_id = self.session_metadata.participant.participant_id
session_id = self.session_id
participant_id = self.session_metadata.sanitization.sanitized_participant_id
session_id = self.session_metadata.sanitization.sanitized_session_id

subject_directory = self.run_config.bids_directory / f"sub-{participant_id}"
subject_directory.mkdir(exist_ok=True)
Expand Down
21 changes: 20 additions & 1 deletion src/nwb2bids/bids_models/_bids_session_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pathlib
import re
import typing

import h5py
import pydantic
Expand All @@ -13,8 +14,10 @@
from ._model_globals import _VALID_ID_REGEX
from ._participant import Participant
from ._probes import ProbeTable
from .._converters._run_config import RunConfig
from .._inspection._inspection_result import Category, DataStandard, InspectionResult, Severity
from .._tools import cache_read_nwb
from ..sanitization import Sanitization


class BidsSessionMetadata(BaseMetadataContainerModel):
Expand All @@ -30,6 +33,19 @@ class BidsSessionMetadata(BaseMetadataContainerModel):
probe_table: ProbeTable | None = None
electrode_table: ElectrodeTable | None = None
channel_table: ChannelTable | None = None
run_config: RunConfig = pydantic.Field(default_factory=RunConfig)
sanitization: Sanitization | None = None

def model_post_init(self, context: typing.Any, /) -> None:
if self.sanitization is not None:
return

self.sanitization = Sanitization(
sanitization_config=self.run_config.sanitization_config,
sanitization_file_path=self.run_config.sanitization_file_path,
original_session_id=self.session_id,
original_participant_id=self.participant.participant_id,
)

@pydantic.computed_field
@property
Expand Down Expand Up @@ -93,7 +109,9 @@ def _check_fields(self, file_paths: list[pathlib.Path] | list[pydantic.HttpUrl])
@classmethod
@pydantic.validate_call
def from_nwbfile_paths(
cls, nwbfile_paths: list[pydantic.FilePath] | list[pydantic.HttpUrl] = pydantic.Field(min_length=1)
cls,
nwbfile_paths: list[pydantic.FilePath] | list[pydantic.HttpUrl] = pydantic.Field(min_length=1),
run_config: RunConfig = pydantic.Field(default_factory=RunConfig),
) -> typing_extensions.Self:
# Differentiate local path from URL
if isinstance(next(iter(nwbfile_paths)), pathlib.Path):
Expand All @@ -116,6 +134,7 @@ def from_nwbfile_paths(
dictionary = {
"session_id": session_id,
"participant": participant,
"run_config": run_config,
}
if events is not None:
dictionary["events"] = events
Expand Down
7 changes: 5 additions & 2 deletions src/nwb2bids/bids_models/_participant.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class Participant(BaseMetadataModel):
participant_id: str = pydantic.Field(
participant_id: str | None = pydantic.Field(
description="A unique identifier for this participant.",
default=None,
)
Expand Down Expand Up @@ -203,7 +203,10 @@ def from_nwbfiles(cls, nwbfiles: list[pydantic.InstanceOf[pynwb.NWBFile]]) -> ty
severity=Severity.CRITICAL,
)
)
participant = cls(messages=messages)
participant = cls(
messages=messages,
participant_id="0", # Similar to the missing session ID; let placeholder default to "0"
)
return participant

participant = cls(
Expand Down
16 changes: 16 additions & 0 deletions src/nwb2bids/sanitization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Collection of sanitization procedures to apply per field.

'Sanitization' in this sense means taking the NWB values for various fields, such as labels, and transforming them
to conform to BIDS.

Enabling more types of sanitization increases the validity of the resulting BIDS dataset.
"""

from ._configuration import SanitizationConfig
from ._sanitization import Sanitization

__all__ = [
"Sanitization",
"SanitizationConfig",
]
Loading