Skip to content

Commit 8f75a7d

Browse files
authored
Fix and extend anonymize_info (mne-tools#13647)
1 parent cd9ae4d commit 8f75a7d

4 files changed

Lines changed: 78 additions & 16 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add finer-grained control to :func:`mne.io.anonymize_info` and the related methods :meth:`mne.io.Raw.anonymize`, :meth:`mne.Epochs.anonymize`, and :meth:`mne.Evoked.anonymize` by allowing ``keep_his`` to accept one or more strings; ``keep_his=True`` remains unchanged (retaining ``"his_id"``, ``"sex"``, and ``"hand"`` from anonymization), but individual fields can now also be retained (the default ``keep_his=False`` also remains unchanged and still anonymizes all fields), by `Clemens Brunner`_.

mne/_fiff/meas_info.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3668,6 +3668,18 @@ def anonymize_info(info, daysback=None, keep_his=False, verbose=None):
36683668
"""
36693669
_validate_type(info, "info", "self")
36703670

3671+
valid_fields = {"his_id", "sex", "hand"}
3672+
if isinstance(keep_his, bool): # True means keep all fields, False means keep none
3673+
keep_fields = valid_fields if keep_his else set()
3674+
elif isinstance(keep_his, str):
3675+
_check_option("keep_his", keep_his, valid_fields)
3676+
keep_fields = {keep_his}
3677+
else:
3678+
_validate_type(keep_his, (list, tuple, set), "keep_his")
3679+
keep_fields = set(keep_his)
3680+
for field in keep_fields:
3681+
_check_option("keep_his", field, valid_fields)
3682+
36713683
default_anon_dos = datetime.datetime(
36723684
2000, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
36733685
)
@@ -3718,17 +3730,19 @@ def anonymize_info(info, daysback=None, keep_his=False, verbose=None):
37183730
if subject_info is not None:
37193731
if subject_info.get("id") is not None:
37203732
subject_info["id"] = default_subject_id
3721-
if keep_his:
3733+
if keep_fields:
37223734
logger.info(
3723-
"Not fully anonymizing info - keeping his_id, sex, and hand info"
3735+
f"Not fully anonymizing info - keeping {', '.join(sorted(keep_fields))}"
3736+
" of subject_info"
37243737
)
3725-
else:
3738+
if "his_id" not in keep_fields:
37263739
if subject_info.get("his_id") is not None:
37273740
subject_info["his_id"] = str(default_subject_id)
3741+
if "sex" not in keep_fields:
37283742
if subject_info.get("sex") is not None:
37293743
subject_info["sex"] = default_sex
3730-
if subject_info.get("hand") is not None:
3731-
del subject_info["hand"] # there's no "unknown" setting
3744+
if "hand" not in keep_fields:
3745+
subject_info.pop("hand", None) # there's no "unknown" setting
37323746

37333747
for key in ("last_name", "first_name", "middle_name"):
37343748
if subject_info.get(key) is not None:

mne/_fiff/tests/test_meas_info.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ def _test_anonymize_info(base_info, tmp_path):
743743
base_info["subject_info"].update(
744744
birthday=date(1987, 4, 8),
745745
his_id="foobar",
746-
sex=0,
746+
sex=1,
747747
)
748748

749749
# generate expected info...
@@ -812,7 +812,7 @@ def _adjust_back(e_i, dt):
812812
exp_info_2 = exp_info.copy()
813813
with exp_info_2._unlock():
814814
exp_info_2["subject_info"]["his_id"] = "foobar"
815-
exp_info_2["subject_info"]["sex"] = 0
815+
exp_info_2["subject_info"]["sex"] = 1
816816
exp_info_2["subject_info"]["hand"] = 1
817817

818818
# exp 3 tests is a supplied daysback
@@ -842,12 +842,54 @@ def _check_equiv(got, want, err_msg):
842842
new_info = anonymize_info(base_info.copy(), keep_his=True)
843843
_check_equiv(new_info, exp_info_2, err_msg="anon keep_his mismatch")
844844

845+
# keep only his_id
846+
new_info = anonymize_info(base_info.copy(), keep_his="his_id")
847+
assert new_info["subject_info"]["his_id"] == "foobar"
848+
assert new_info["subject_info"]["sex"] == 0
849+
assert "hand" not in new_info["subject_info"]
850+
851+
# keep only sex
852+
new_info = anonymize_info(base_info.copy(), keep_his="sex")
853+
assert new_info["subject_info"]["his_id"] == "0"
854+
assert new_info["subject_info"]["sex"] == 1
855+
assert "hand" not in new_info["subject_info"]
856+
857+
# keep only hand
858+
new_info = anonymize_info(base_info.copy(), keep_his="hand")
859+
assert new_info["subject_info"]["his_id"] == "0"
860+
assert new_info["subject_info"]["sex"] == 0
861+
assert new_info["subject_info"]["hand"] == 1
862+
863+
# keep his_id and sex
864+
new_info = anonymize_info(base_info.copy(), keep_his=["his_id", "sex"])
865+
assert new_info["subject_info"]["his_id"] == "foobar"
866+
assert new_info["subject_info"]["sex"] == 1
867+
assert "hand" not in new_info["subject_info"]
868+
869+
# keep only hand
870+
new_info = anonymize_info(base_info.copy(), keep_his=["hand"])
871+
assert new_info["subject_info"]["his_id"] == "0"
872+
assert new_info["subject_info"]["sex"] == 0
873+
assert new_info["subject_info"]["hand"] == 1
874+
875+
# keep his_id and hand
876+
new_info = anonymize_info(base_info.copy(), keep_his=("his_id", "hand"))
877+
assert new_info["subject_info"]["his_id"] == "foobar"
878+
assert new_info["subject_info"]["sex"] == 0
879+
assert new_info["subject_info"]["hand"] == 1
880+
881+
# invalid keep_his values
882+
with pytest.raises(ValueError, match="Invalid value"):
883+
anonymize_info(base_info.copy(), keep_his="invalid_field")
884+
885+
with pytest.raises(ValueError, match="Invalid value"):
886+
anonymize_info(base_info.copy(), keep_his=["his_id", "invalid"])
887+
845888
new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days)
846889
_check_equiv(new_info, exp_info_3, err_msg="anon daysback mismatch")
847890

848891
with pytest.raises(RuntimeError, match="anonymize_info generated"):
849892
anonymize_info(base_info.copy(), daysback=delta_t_3.days)
850-
# assert_object_equal(new_info, exp_info_4)
851893

852894
# test with meas_date = None
853895
with base_info._unlock():

mne/utils/docs.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,9 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75):
249249
- meas_date, file_id, meas_id
250250
A default value, or as specified by ``daysback``.
251251
- subject_info
252-
Default values, except for 'birthday' which is adjusted
253-
to maintain the subject age.
252+
Default values, except for 'birthday', which is adjusted to maintain the subject
253+
age. If ``keep_his`` is not ``False``, then the fields 'his_id', 'sex', and
254+
'hand' are not anonymized, depending on the value of ``keep_his``.
254255
- experimenter, proj_name, description
255256
Default strings.
256257
- utc_offset
@@ -2276,12 +2277,16 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75):
22762277
# K
22772278

22782279
docdict["keep_his_anonymize_info"] = """
2279-
keep_his : bool
2280-
If ``True``, ``his_id`` of ``subject_info`` will **not** be overwritten.
2281-
Defaults to ``False``.
2282-
2283-
.. warning:: This could mean that ``info`` is not fully
2284-
anonymized. Use with caution.
2280+
keep_his : bool | "his_id" | "sex" | "hand" | sequence of {"his_id", "sex", "hand"}
2281+
If ``True``, ``his_id``, ``sex``, and ``hand`` of ``subject_info`` will **not** be
2282+
overwritten. If ``False``, these fields will be anonymized. If ``"his_id"``,
2283+
``"sex"``, or ``"hand"`` (or any combination thereof in a sequence), only those
2284+
fields will **not** be anonymized. Defaults to ``False``.
2285+
2286+
.. warning:: Setting ``keep_his`` to anything other than ``False`` may result in
2287+
``info`` not being fully anonymized. Use with caution.
2288+
.. versionchanged:: 1.12
2289+
Added support for sequence of ``str``.
22852290
"""
22862291

22872292
docdict["kit_badcoils"] = """

0 commit comments

Comments
 (0)