diff --git a/.github/workflows/run_dev_tests.yml b/.github/workflows/run_dev_tests.yml index b1a19b3..76f4f09 100644 --- a/.github/workflows/run_dev_tests.yml +++ b/.github/workflows/run_dev_tests.yml @@ -9,7 +9,7 @@ jobs: ci: strategy: matrix: - python-version: [ '3.8', '3.9', '3.10' ] + python-version: [ '3.10', '3.11', '3.12' ] uses: AllenNeuralDynamics/.github/.github/workflows/test-ci.yml@main with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/run_main_tests.yml b/.github/workflows/run_main_tests.yml index 9eaf522..855ec21 100644 --- a/.github/workflows/run_main_tests.yml +++ b/.github/workflows/run_main_tests.yml @@ -10,7 +10,7 @@ jobs: ci: strategy: matrix: - python-version: [ '3.8', '3.9', '3.10' ] + python-version: [ '3.10', '3.11', '3.12' ] uses: AllenNeuralDynamics/.github/.github/workflows/test-ci.yml@main with: python-version: ${{ matrix.python-version }} diff --git a/pyproject.toml b/pyproject.toml index c26d208..45a9768 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ rds = [ "SQLAlchemy==1.4.49" ] helpers = [ - "aind-data-schema>=1.1.0,<2.0", + "aind-data-schema>=2.0", "pandas", ] full = [ diff --git a/src/aind_data_access_api/document_db.py b/src/aind_data_access_api/document_db.py index 7bd8fb0..81f04af 100644 --- a/src/aind_data_access_api/document_db.py +++ b/src/aind_data_access_api/document_db.py @@ -585,7 +585,7 @@ def __init__( host: str, database: str = "metadata_index", collection: str = "data_assets", - version: str = "v1", + version: str = "v2", boto: Optional[BotoSession] = None, session: Optional[Session] = None, ): diff --git a/src/aind_data_access_api/helpers/data_schema.py b/src/aind_data_access_api/helpers/data_schema.py index a5fc244..46df51e 100644 --- a/src/aind_data_access_api/helpers/data_schema.py +++ b/src/aind_data_access_api/helpers/data_schema.py @@ -137,15 +137,12 @@ def get_quality_control_status_df( for name, qc in zip(names, qcs): qc_metrics_flat = {} qc_metrics_flat["name"] = name - for eval in qc.evaluations: - for metric in eval.metrics: - # Find the most recent status before the given datetime - for status in reversed(metric.status_history): - if status.timestamp <= date: - qc_metrics_flat[f"{eval.name}_{metric.name}"] = ( - status.status - ) - break + for metric in qc.metrics: + # Find the most recent status before the given datetime + for status in reversed(metric.status_history): + if status.timestamp <= date: + qc_metrics_flat[metric.name] = status.status + break data.append(qc_metrics_flat) @@ -175,9 +172,8 @@ def get_quality_control_value_df( for name, qc in zip(names, qcs): qc_metrics_flat = {} qc_metrics_flat["name"] = name - for eval in qc.evaluations: - for metric in eval.metrics: - qc_metrics_flat[f"{eval.name}_{metric.name}"] = metric.value + for metric in qc.metrics: + qc_metrics_flat[metric.name] = metric.value data.append(qc_metrics_flat) diff --git a/tests/helpers/test_data_schema.py b/tests/helpers/test_data_schema.py index 90edf83..090ee19 100644 --- a/tests/helpers/test_data_schema.py +++ b/tests/helpers/test_data_schema.py @@ -9,10 +9,9 @@ import pandas as pd from aind_data_schema.core.quality_control import ( - QCEvaluation, + QualityControl, QCMetric, QCStatus, - QualityControl, Stage, Status, ) @@ -167,22 +166,18 @@ def test_get_qc_value_df( ) metric0 = QCMetric( name="Metric0", + modality=Modality.ECEPHYS, + stage=Stage.RAW, value=0, status_history=[ status, ], ) - eval = QCEvaluation( - name="Evaluation0", - modality=Modality.ECEPHYS, - stage=Stage.RAW, - metrics=[metric0], - ) - mock_get_quality_control_by_names.return_value = [ QualityControl( - evaluations=[eval], + metrics=[metric0], + default_grouping=["test_grouping"], ) ] @@ -191,7 +186,7 @@ def test_get_qc_value_df( test_df = pd.DataFrame( { "name": ["fake_name"], - "Evaluation0_Metric0": [0], + "Metric0": [0], } ) @@ -214,22 +209,18 @@ def test_get_qc_status_df( ) metric0 = QCMetric( name="Metric0", + modality=Modality.ECEPHYS, + stage=Stage.RAW, value=0, status_history=[ status, ], ) - eval = QCEvaluation( - name="Evaluation0", - modality=Modality.ECEPHYS, - stage=Stage.RAW, - metrics=[metric0], - ) - mock_get_quality_control_by_names.return_value = [ QualityControl( - evaluations=[eval], + metrics=[metric0], + default_grouping=["test_grouping"], ) ] @@ -238,7 +229,7 @@ def test_get_qc_status_df( test_df = pd.DataFrame( { "name": ["fake_name"], - "Evaluation0_Metric0": [Status.PASS], + "Metric0": [Status.PASS], } ) @@ -263,8 +254,8 @@ def test_get_quality_control_by_names_valid(self): ) self.assertEqual(len(result), 2) - self.assertEqual(result[0].evaluations[0].name, "Drift map") - self.assertEqual(result[1].evaluations[0].name, "Drift map") + self.assertEqual(result[0].metrics[0].name, "Probe A drift") + self.assertEqual(result[1].metrics[0].name, "Probe A drift") mock_client.fetch_records_by_filter_list.assert_called_once_with( filter_key="name", filter_values=["name1", "name2"], @@ -313,6 +304,135 @@ def test_get_quality_control_by_names_no_records(self): projection={"quality_control": 1}, ) + def test_get_qc_value_df_with_example_data(self): + """Test get_quality_control_value_df with actual example QC data.""" + mock_client = MagicMock() + mock_client.fetch_records_by_filter_list.return_value = [ + {"quality_control": self.example_quality_control.copy()}, + {"quality_control": self.example_quality_control.copy()}, + ] + + result_df = get_quality_control_value_df( + client=mock_client, names=["session1", "session2"] + ) + + # Check that we got the right shape + self.assertEqual(len(result_df), 2) + self.assertEqual(list(result_df["name"]), ["session1", "session2"]) + + # Check specific values from the example QC data + expected_columns = [ + "name", + "Probe A drift", + "Probe B drift", + "Probe C drift", + "Expected frame count", + "Video 1 frame count", + "Video 2 num frames", + "ProbeA", + "ProbeB", + "ProbeC", + ] + self.assertEqual( + sorted(result_df.columns.tolist()), sorted(expected_columns) + ) + + # Check specific values + self.assertEqual(result_df["Probe C drift"].iloc[0], "Low") + self.assertEqual(result_df["Expected frame count"].iloc[0], 662) + self.assertEqual(result_df["Video 1 frame count"].iloc[0], 662) + self.assertEqual(result_df["ProbeA"].iloc[0], True) + self.assertEqual(result_df["ProbeB"].iloc[0], True) + self.assertEqual(result_df["ProbeC"].iloc[0], True) + + # Check that Probe A and B drift have complex value structures + probe_a_value = result_df["Probe A drift"].iloc[0] + self.assertIsInstance(probe_a_value, dict) + self.assertEqual(probe_a_value["value"], "") + self.assertEqual(probe_a_value["type"], "dropdown") + + probe_b_value = result_df["Probe B drift"].iloc[0] + self.assertIsInstance(probe_b_value, dict) + self.assertEqual(probe_b_value["value"], "") + self.assertEqual(probe_b_value["type"], "checkbox") + + def test_get_qc_status_df_with_example_data(self): + """Test get_quality_control_status_df with actual example QC data.""" + mock_client = MagicMock() + mock_client.fetch_records_by_filter_list.return_value = [ + {"quality_control": self.example_quality_control.copy()}, + {"quality_control": self.example_quality_control.copy()}, + ] + + # Use a date after the timestamps in the example data + test_date = datetime( + 2022, 11, 23, tzinfo=datetime.now().astimezone().tzinfo + ) + + result_df = get_quality_control_status_df( + client=mock_client, names=["session1", "session2"], date=test_date + ) + + # Check that we got the right shape + self.assertEqual(len(result_df), 2) + self.assertEqual(list(result_df["name"]), ["session1", "session2"]) + + # Check specific status values from the example QC data + expected_columns = [ + "name", + "Probe A drift", + "Probe B drift", + "Probe C drift", + "Expected frame count", + "Video 1 frame count", + "Video 2 num frames", + "ProbeA", + "ProbeB", + "ProbeC", + ] + self.assertEqual( + sorted(result_df.columns.tolist()), sorted(expected_columns) + ) + + # Check specific status values + self.assertEqual(result_df["Probe C drift"].iloc[0], Status.PASS) + self.assertEqual( + result_df["Expected frame count"].iloc[0], Status.PASS + ) + self.assertEqual(result_df["Video 1 frame count"].iloc[0], Status.PASS) + self.assertEqual(result_df["Video 2 num frames"].iloc[0], Status.PASS) + self.assertEqual(result_df["ProbeA"].iloc[0], Status.PASS) + self.assertEqual(result_df["ProbeB"].iloc[0], Status.PASS) + self.assertEqual(result_df["ProbeC"].iloc[0], Status.PASS) + + # Check that Probe A and B drift have pending status + self.assertEqual(result_df["Probe A drift"].iloc[0], Status.PENDING) + self.assertEqual(result_df["Probe B drift"].iloc[0], Status.PENDING) + + def test_get_qc_status_df_with_date_filtering(self): + """Test get_quality_control_status_df correctly filters by date.""" + mock_client = MagicMock() + mock_client.fetch_records_by_filter_list.return_value = [ + {"quality_control": self.example_quality_control.copy()} + ] + + # Use a date before the timestamps in the example data + early_date = datetime( + 2022, 11, 21, tzinfo=datetime.now().astimezone().tzinfo + ) + + result_df = get_quality_control_status_df( + client=mock_client, names=["session1"], date=early_date + ) + + # Since the date is before all status timestamps, no statuses found + # The function should only include the name column + self.assertEqual(len(result_df), 1) + self.assertEqual(list(result_df["name"]), ["session1"]) + + metric_columns = [col for col in result_df.columns if col != "name"] + self.assertEqual(len(metric_columns), 0) + if __name__ == "__main__": unittest.main() diff --git a/tests/resources/helpers/quality_control.json b/tests/resources/helpers/quality_control.json index 83bf6aa..0a95cbd 100644 --- a/tests/resources/helpers/quality_control.json +++ b/tests/resources/helpers/quality_control.json @@ -1,176 +1,279 @@ { - "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/quality_control.py", - "schema_version": "1.1.1", - "evaluations": [ - { - "modality": { - "name": "Extracellular electrophysiology", - "abbreviation": "ecephys" - }, - "stage": "Raw data", - "name": "Drift map", - "description": "Qualitative check that drift map shows minimal movement", - "metrics": [ - { - "name": "Probe A drift", - "value": { - "value": "", - "options": [ - "Low", - "Medium", - "High" - ], - "status": [ - "Pass", - "Fail", - "Fail" - ], - "type": "dropdown" - }, - "description": null, - "reference": "ecephys-drift-map", - "status_history": [ - { - "evaluator": "", - "status": "Pending", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - }, - { - "name": "Probe B drift", - "value": { - "value": "", - "options": [ - "Drift visible in entire session", - "Drift visible in part of session", - "Sudden movement event" - ], - "status": [ - "Fail", - "Pass", - "Fail" - ], - "type": "checkbox" - }, - "description": null, - "reference": "ecephys-drift-map", - "status_history": [ - { - "evaluator": "", - "status": "Pending", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - }, - { - "name": "Probe C drift", - "value": "Low", - "description": null, - "reference": "ecephys-drift-map", - "status_history": [ - { - "evaluator": "Automated", - "status": "Pass", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - } - ], - "notes": "", - "allow_failed_metrics": false - }, - { - "modality": { - "name": "Behavior videos", - "abbreviation": "behavior-videos" - }, - "stage": "Raw data", - "name": "Video frame count check", - "description": null, - "metrics": [ - { - "name": "video_1_num_frames", - "value": 662, - "description": null, - "reference": null, - "status_history": [ - { - "evaluator": "Automated", - "status": "Pass", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - }, - { - "name": "video_2_num_frames", - "value": 662, - "description": null, - "reference": null, - "status_history": [ - { - "evaluator": "Automated", - "status": "Pass", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - } - ], - "notes": "Pass when video_1_num_frames==video_2_num_frames", - "allow_failed_metrics": false - }, - { - "modality": { - "name": "Extracellular electrophysiology", - "abbreviation": "ecephys" - }, - "stage": "Raw data", - "name": "Probes present", - "description": null, - "metrics": [ - { - "name": "ProbeA_success", - "value": true, - "description": null, - "reference": null, - "status_history": [ - { - "evaluator": "Automated", - "status": "Pass", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - }, - { - "name": "ProbeB_success", - "value": true, - "description": null, - "reference": null, - "status_history": [ - { - "evaluator": "Automated", - "status": "Pass", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - }, - { - "name": "ProbeC_success", - "value": true, - "description": null, - "reference": null, - "status_history": [ - { - "evaluator": "Automated", - "status": "Pass", - "timestamp": "2022-11-22T00:00:00Z" - } - ] - } - ], - "notes": null, - "allow_failed_metrics": false - } - ], - "notes": null - } \ No newline at end of file + "object_type": "Quality control", + "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/quality_control.py", + "schema_version": "2.0.6", + "metrics": [ + { + "object_type": "QC metric", + "name": "Probe A drift", + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "value": { + "value": "", + "options": [ + "Low", + "Medium", + "High" + ], + "status": [ + "Pass", + "Fail", + "Fail" + ], + "type": "dropdown" + }, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "", + "status": "Pending", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when drift map shows minimal movement", + "reference": "ecephys-drift-map", + "tags": [ + "Drift map", + "Probe A" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "Probe B drift", + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "value": { + "value": "", + "options": [ + "No Drift", + "Drift visible in part of acquisition", + "Drift visible in entire acquisition", + "Sudden movement event" + ], + "status": [ + "Pass", + "Pass", + "Fail", + "Fail" + ], + "type": "checkbox" + }, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "", + "status": "Pending", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when drift map shows minimal movement", + "reference": "ecephys-drift-map", + "tags": [ + "Drift map", + "Probe B" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "Probe C drift", + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "value": "Low", + "status_history": [ + { + "object_type": "QC status", + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when drift map shows minimal movement", + "reference": "ecephys-drift-map", + "tags": [ + "Drift map", + "Probe C" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "Expected frame count", + "modality": { + "name": "Behavior videos", + "abbreviation": "behavior-videos" + }, + "stage": "Raw data", + "value": 662, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Expected frame count from experiment length, always pass", + "reference": null, + "tags": [ + "Frame count checks" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "Video 1 frame count", + "modality": { + "name": "Behavior videos", + "abbreviation": "behavior-videos" + }, + "stage": "Raw data", + "value": 662, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when frame count matches expected", + "reference": null, + "tags": [ + "Frame count checks", + "Video 1" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "Video 2 num frames", + "modality": { + "name": "Behavior videos", + "abbreviation": "behavior-videos" + }, + "stage": "Raw data", + "value": 662, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when frame count matches expected", + "reference": null, + "tags": [ + "Frame count checks", + "Video 2" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "ProbeA", + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "value": true, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when probe is present in the recording", + "reference": null, + "tags": [ + "Probes present" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "ProbeB", + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "value": true, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when probe is present in the recording", + "reference": null, + "tags": [ + "Probes present" + ], + "evaluated_assets": null + }, + { + "object_type": "QC metric", + "name": "ProbeC", + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "value": true, + "status_history": [ + { + "object_type": "QC status", + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ], + "description": "Pass when probe is present in the recording", + "reference": null, + "tags": [ + "Probes present" + ], + "evaluated_assets": null + } + ], + "key_experimenters": null, + "notes": null, + "default_grouping": [ + "Drift map", + "Frame count checks", + "Probes present" + ], + "allow_tag_failures": [ + "Video 2" + ], + "status": { + "Probe A": "Pending", + "Probes present": "Pass", + "Video 1": "Pass", + "Probe C": "Pass", + "Video 2": "Pass", + "Probe B": "Pending", + "Frame count checks": "Pass", + "Drift map": "Pending", + "behavior-videos": "Pass", + "ecephys": "Pending", + "Raw data": "Pending" + } +} \ No newline at end of file diff --git a/tests/resources/helpers/quality_control_invalid.json b/tests/resources/helpers/quality_control_invalid.json index 54dfb52..ead71a5 100644 --- a/tests/resources/helpers/quality_control_invalid.json +++ b/tests/resources/helpers/quality_control_invalid.json @@ -1,5 +1,5 @@ { "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/quality_control.py", - "schema_version": "1.1.1", + "schema_version": "2.0.6", "notes": null } \ No newline at end of file