From 564fcaa1f4064e8a80973b78e866238779a7081b Mon Sep 17 00:00:00 2001 From: Arun <“arun.karnani.k@gmail.com”> Date: Mon, 23 Feb 2026 17:06:31 +0000 Subject: [PATCH 1/3] Adding unit_testing examples from presentation - Updated the main README.md to specify the inclusion of the new content - Added a second README.md inside the Unit_testing_python_2026 with some explanation on how to run pytest. --- README.md | 5 ++ .../example_code/README.md | 9 ++++ .../example_code/bin/__init__.py | 0 .../example_code/bin/clinvar_utils.py | 44 +++++++++++++++ .../example_code/bin/script.py | 41 ++++++++++++++ .../example_code/bin/script_bed.py | 53 +++++++++++++++++++ .../example_code/tests/__init__.py | 0 .../example_code/tests/test_clinvar_utils.py | 44 +++++++++++++++ .../example_code/tests/test_script.py | 44 +++++++++++++++ .../example_code/tests/test_script_bed.py | 53 +++++++++++++++++++ 10 files changed, 293 insertions(+) create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/README.md create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/__init__.py create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/script.py create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/__init__.py create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py diff --git a/README.md b/README.md index 48aacf9..8ad5075 100644 --- a/README.md +++ b/README.md @@ -22,4 +22,9 @@ This series of notebooks and scripts is for an introduction to basics of coding. - hap.py output plotting - TSO500 CNV counting +### 2026 additions + +- Unit Testing: Principles, Python and TDD + + ### Written by East Genomics GLH \ No newline at end of file diff --git a/learning-sessions/Unit_testing_python_2026/example_code/README.md b/learning-sessions/Unit_testing_python_2026/example_code/README.md new file mode 100644 index 0000000..60c58d9 --- /dev/null +++ b/learning-sessions/Unit_testing_python_2026/example_code/README.md @@ -0,0 +1,9 @@ +## Codeschool presentation - Good with Unit testing: Principles, Python and TDD + +Here you will find the scripts that I have used to present some examples for unit testing. Feel free to use this to see how unit testing works, or feel free to use these scripts as a template to your unit testing. + +To run pytest, simply run `pytest` in terminal, inside the `unit_test_example_code` folder. To run pytest-cov, run the following line instead from your terminal: + +```bash +pytest --cov=bin --cov-report=html +``` \ No newline at end of file diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/__init__.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py new file mode 100644 index 0000000..5415a66 --- /dev/null +++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py @@ -0,0 +1,44 @@ +"""Utility functions for fetching clinical significance from ClinVar.""" +import requests + + +def get_clinvar_significance(variation_id): + """ + Fetches clinical significance from ClinVar for a given Variation ID. + + Parameters + ---------- + variation_id : str + The ClinVar Variation ID (e.g., '12345'). + + Returns + ------- + str + The clinical significance of the variant. + + Raises + ------ + requests.exceptions.RequestException + If the API call fails. + ValueError + If the Variation ID is not found or data is malformed. + """ + # ClinVar API endpoint for variant data + url = ("https://api.ncbi.nlm.nih.gov/variation/v0/beta/" + f"clinical-significance/variation/{variation_id}") + + # Send GET request + response = requests.get(url, timeout=10) + + # Raise exception for bad status codes + response.raise_for_status() + + data = response.json() + + # Parse the nested JSON structure to get the significance + try: + significance = data['clinical_significance']['description'] + return significance + except KeyError as exc: + raise ValueError( + f"Could not find significance data for ID {variation_id}") from exc diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/script.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/script.py new file mode 100644 index 0000000..54b6782 --- /dev/null +++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/script.py @@ -0,0 +1,41 @@ +""" +This module contains a function to calculate the GC content of a DNA sequence. +""" + + +def calculate_gc_content(sequence): + """ + Calculates the GC content percentage of a DNA sequence. + + Input: + sequence (str): A string representing the DNA sequence (e.g., "ATGC"). + + Output: + float: The GC content percentage, rounded to two decimal places. + + Raises: + TypeError: If the input is not a string. + ValueError: If the sequence contains characters other than A, T, G, C. + """ + + if not isinstance(sequence, str): + raise TypeError("Sequence must be a string.") + + if not sequence: + return 0.0 + + # Normalize to uppercase to handle mixed cases + seq = sequence.upper() + + # Validate that sequence contains only DNA bases + valid_bases = set('ATGC') + if not all(base in valid_bases for base in seq): + raise ValueError("Sequence contains invalid characters. " + "Only A, T, G, C are allowed.") + + # Count Gs and Cs + g_count = seq.count('G') + c_count = seq.count('C') + + gc_percentage = ((g_count + c_count) / len(seq)) * 100 + return round(gc_percentage, 2) diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py new file mode 100644 index 0000000..864a4f7 --- /dev/null +++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py @@ -0,0 +1,53 @@ +"""Utility function for calculating the length of a bed file.""" +import os +import pandas as pd + + +def calculate_total_bed_length(filepath): + """ + Calculates the total genomic length covered by a BED file. + + This function reads the first three columns of a BED file (Chrom, Start, + End), validates that the coordinates are numeric and logical, and + returns the sum of the lengths of all regions. + + Parameters + ---------- + filepath : str + The path to the BED file to be processed. + + Returns + ------- + int + The total number of base pairs across all regions defined in the file. + + Raises + ------ + FileNotFoundError + If the provided filepath does not exist on the system. + TypeError + If the 'start' or 'end' columns contain non-numeric data. + ValueError + If any record has a start coordinate greater than the end coordinate. + """ + + if not os.path.exists(filepath): + raise FileNotFoundError(f"The file '{filepath}' does not exist.") + + cols = ['chrom', 'start', 'end'] + df = pd.read_csv(filepath, sep='\t', names=cols, usecols=[0, 1, 2]) + + if df.empty: + return 0 + + # Explicit validation instead of try/except + if not pd.api.types.is_numeric_dtype(df['start']) or \ + not pd.api.types.is_numeric_dtype(df['end']): + raise TypeError("BED coordinates must be numeric.") + + df['length'] = df['end'] - df['start'] + + if (df['length'] < 0).any(): + raise ValueError("Found BED record where start > end.") + + return int(df['length'].sum()) diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/__init__.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py new file mode 100644 index 0000000..51af840 --- /dev/null +++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py @@ -0,0 +1,44 @@ +"""Unit tests for the calculate_gc_content function in bin/clinvar_utils.py.""" +import pytest +from bin.clinvar_utils import get_clinvar_significance + + +@pytest.fixture(name="clinvar_mock") +def mock_clinvar_factory(mocker): + """A reusable fixture to mock the ClinVar API response.""" + # Patch the request.get method in the clinvar_utils module + mock = mocker.patch('bin.clinvar_utils.requests.get') + # Set a default return value + mock_resp = mocker.Mock() + mock_resp.json.return_value = { + 'clinical_significance': + {'description': 'Likely Benign'} + } + mock.return_value = mock_resp + return mock + + +class TestClinVarUtils: + """Tests for the get_clinvar_significance function.""" + + def test_significance_logic(self, _clinvar_mock): + """ + Test that the function correctly extracts clinical + significance from mocked API response. + """ + # This test uses the fixture directly + result = get_clinvar_significance('67890') + assert result == 'Likely Benign' + + def test_key_error(self, clinvar_mock): + """ + Test that a ValueError is raised when the expected + keys are missing in the API response. + """ + # Modify the mock to return a JSON without the expected keys + clinvar_mock.return_value.json.return_value = {} + with pytest.raises( + ValueError, + match="Could not find significance data for ID 67890" + ): + get_clinvar_significance('67890') diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py new file mode 100644 index 0000000..d949db7 --- /dev/null +++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py @@ -0,0 +1,44 @@ +"""Unit tests for the calculate_gc_content function in bin/script.py.""" +import pytest +from bin.script import calculate_gc_content + + +class TestGCContent: + """Class to test valid input scenarios for GC content calculation.""" + + def test_basic_sequence(self): + """Test standard DNA sequence.""" + assert calculate_gc_content("GCGC") == 100.0 + + def test_mixed_bases(self): + """Test a mixture of all bases.""" + assert calculate_gc_content("ATGCGT") == 50.0 + + def test_case_insensitivity(self): + """Test that it handles lowercase and mixed-case letters.""" + assert calculate_gc_content("atgcgt") == 50.0 + assert calculate_gc_content("AtGcGt") == 50.0 + + +class TestGCContentEdgeCases: + """Class to test empty, invalid, or unusual input scenarios.""" + + def test_empty_string(self): + """Test empty string input.""" + assert calculate_gc_content("") == 0.0 + + def test_invalid_input_type(self): + """Test that a TypeError is raised for non-string inputs.""" + with pytest.raises(TypeError, match="Sequence must be a string"): + calculate_gc_content(12345) + + def test_rounding(self): + """Test that the result is rounded correctly.""" + # GC content of "GAT" is 1/3 = 33.3333...% + assert calculate_gc_content("GAT") == 33.33 + + def test_invalid_characters(self): + """Test that a ValueError is raised for sequences with invalid characters.""" + with pytest.raises(ValueError, match="Sequence contains invalid characters. " + "Only A, T, G, C are allowed."): + calculate_gc_content("ATGCX") \ No newline at end of file diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py new file mode 100644 index 0000000..43ededc --- /dev/null +++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py @@ -0,0 +1,53 @@ +"""Unit tests for the calculate_gc_content function in bin/script_bed.py.""" +import pytest +from bin.script_bed import calculate_total_bed_length + + +@pytest.fixture(name="valid_bed") +def valid_bed_factory(tmp_path): + """Creates a valid 3-line BED file.""" + f = tmp_path / "valid.bed" + f.write_text("chr1\t100\t200\nchr2\t0\t50") + return f + + +@pytest.fixture(name="malformed_bed") +def malformed_bed_factory(tmp_path): + """Creates a BED file with text in the coordinate columns.""" + f = tmp_path / "text_coords.bed" + f.write_text("chr1\tstring_data\t200") + return f + + +class TestBedDocumentationContract: + """Verifies that the function adheres to its docstring specifications.""" + + def test_returns_correct_int(self, valid_bed): + """Verifies the 'Returns' section of the docstring.""" + result = calculate_total_bed_length(valid_bed) + assert isinstance(result, int) + assert result == 150 + + def test_raises_file_not_found(self): + """Verifies the 'Raises FileNotFoundError' section.""" + with pytest.raises(FileNotFoundError): + calculate_total_bed_length("imaginary_file.bed") + + def test_raises_type_error(self, malformed_bed): + """Verifies the 'Raises TypeError' section.""" + with pytest.raises(TypeError, match="must be numeric"): + calculate_total_bed_length(malformed_bed) + + def test_raises_value_error(self, tmp_path): + """Verifies the 'Raises ValueError' section.""" + f = tmp_path / "invalid_coords.bed" + f.write_text("chr1\t200\t100") # start > end + with pytest.raises(ValueError, match="start > end"): + calculate_total_bed_length(f) + + def test_empty_file_returns_zero(self, tmp_path): + """Verifies that an empty BED file returns 0.""" + f = tmp_path / "empty.bed" + f.write_text("") + result = calculate_total_bed_length(f) + assert result == 0 From 41a6b0bc789438bdb41321bd6e5e692e19ea731c Mon Sep 17 00:00:00 2001 From: Arun <“arun.karnani.k@gmail.com”> Date: Thu, 26 Feb 2026 09:53:28 +0000 Subject: [PATCH 2/3] Address PEP8 and coderabbit issues. --- .../Unit_testing_python_2026/example_code/README.md | 4 +++- .../example_code/bin/clinvar_utils.py | 3 ++- .../example_code/tests/test_clinvar_utils.py | 6 ++++-- .../example_code/tests/test_script.py | 7 ++++--- .../example_code/tests/test_script_bed.py | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/learning-sessions/Unit_testing_python_2026/example_code/README.md b/learning-sessions/Unit_testing_python_2026/example_code/README.md index 60c58d9..04e2c11 100644 --- a/learning-sessions/Unit_testing_python_2026/example_code/README.md +++ b/learning-sessions/Unit_testing_python_2026/example_code/README.md @@ -2,7 +2,9 @@ Here you will find the scripts that I have used to present some examples for unit testing. Feel free to use this to see how unit testing works, or feel free to use these scripts as a template to your unit testing. -To run pytest, simply run `pytest` in terminal, inside the `unit_test_example_code` folder. To run pytest-cov, run the following line instead from your terminal: +To run pytest, simply run `pytest` in terminal, inside the `example_code` folder. To run pytest-cov, run the following line instead from your terminal: + +**Prerequisites:** Install dependencies with `pip install pytest pytest-cov pytest-mock` before running the tests. ```bash pytest --cov=bin --cov-report=html diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py index 5415a66..8764143 100644 --- a/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py +++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py @@ -38,7 +38,8 @@ def get_clinvar_significance(variation_id): # Parse the nested JSON structure to get the significance try: significance = data['clinical_significance']['description'] - return significance except KeyError as exc: raise ValueError( f"Could not find significance data for ID {variation_id}") from exc + else: + return significance diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py index 51af840..6b77109 100644 --- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py +++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py @@ -1,4 +1,4 @@ -"""Unit tests for the calculate_gc_content function in bin/clinvar_utils.py.""" +"""Unit tests for get_clinvar_significance function in bin/clinvar_utils.py.""" import pytest from bin.clinvar_utils import get_clinvar_significance @@ -21,7 +21,9 @@ def mock_clinvar_factory(mocker): class TestClinVarUtils: """Tests for the get_clinvar_significance function.""" - def test_significance_logic(self, _clinvar_mock): + def test_significance_logic( + self, clinvar_mock # pylint: disable=unused-argument + ): """ Test that the function correctly extracts clinical significance from mocked API response. diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py index d949db7..0c54b11 100644 --- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py +++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py @@ -38,7 +38,8 @@ def test_rounding(self): assert calculate_gc_content("GAT") == 33.33 def test_invalid_characters(self): - """Test that a ValueError is raised for sequences with invalid characters.""" - with pytest.raises(ValueError, match="Sequence contains invalid characters. " + """Test if ValueError is raised for sequences with invalid chars.""" + with pytest.raises(ValueError, + match="Sequence contains invalid characters. " "Only A, T, G, C are allowed."): - calculate_gc_content("ATGCX") \ No newline at end of file + calculate_gc_content("ATGCX") diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py index 43ededc..15076d1 100644 --- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py +++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py @@ -1,11 +1,11 @@ -"""Unit tests for the calculate_gc_content function in bin/script_bed.py.""" +"""Unit tests for calculate_total_bed_length function in bin/script_bed.py.""" import pytest from bin.script_bed import calculate_total_bed_length @pytest.fixture(name="valid_bed") def valid_bed_factory(tmp_path): - """Creates a valid 3-line BED file.""" + """Creates a valid 2-line BED file.""" f = tmp_path / "valid.bed" f.write_text("chr1\t100\t200\nchr2\t0\t50") return f From 1af58e4dd05da63b6857bb48cdc2a3c8e3890124 Mon Sep 17 00:00:00 2001 From: Arun <“arun.karnani.k@gmail.com”> Date: Thu, 26 Feb 2026 10:01:35 +0000 Subject: [PATCH 3/3] One final coderabbit comment to address --- .../Unit_testing_python_2026/example_code/tests/test_script.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py index 0c54b11..1ca0914 100644 --- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py +++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py @@ -40,6 +40,5 @@ def test_rounding(self): def test_invalid_characters(self): """Test if ValueError is raised for sequences with invalid chars.""" with pytest.raises(ValueError, - match="Sequence contains invalid characters. " - "Only A, T, G, C are allowed."): + match="Sequence contains invalid characters"): calculate_gc_content("ATGCX")