From 564fcaa1f4064e8a80973b78e866238779a7081b Mon Sep 17 00:00:00 2001
From: Arun <“arun.karnani.k@gmail.com”>
Date: Mon, 23 Feb 2026 17:06:31 +0000
Subject: [PATCH 1/3] Adding unit_testing examples from presentation - Updated
 the main README.md to specify the inclusion of the new content - Added a
 second README.md inside the Unit_testing_python_2026 with some explanation on
 how to run pytest.

---
 README.md                                     |  5 ++
 .../example_code/README.md                    |  9 ++++
 .../example_code/bin/__init__.py              |  0
 .../example_code/bin/clinvar_utils.py         | 44 +++++++++++++++
 .../example_code/bin/script.py                | 41 ++++++++++++++
 .../example_code/bin/script_bed.py            | 53 +++++++++++++++++++
 .../example_code/tests/__init__.py            |  0
 .../example_code/tests/test_clinvar_utils.py  | 44 +++++++++++++++
 .../example_code/tests/test_script.py         | 44 +++++++++++++++
 .../example_code/tests/test_script_bed.py     | 53 +++++++++++++++++++
 10 files changed, 293 insertions(+)
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/README.md
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/__init__.py
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/script.py
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/__init__.py
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
 create mode 100644 learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py

diff --git a/README.md b/README.md
index 48aacf9..8ad5075 100644
--- a/README.md
+++ b/README.md
@@ -22,4 +22,9 @@ This series of notebooks and scripts is for an introduction to basics of coding.
   - hap.py output plotting
   - TSO500 CNV counting
 
+### 2026 additions
+
+- Unit Testing: Principles, Python and TDD
+
+
 ### Written by East Genomics GLH
\ No newline at end of file
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/README.md b/learning-sessions/Unit_testing_python_2026/example_code/README.md
new file mode 100644
index 0000000..60c58d9
--- /dev/null
+++ b/learning-sessions/Unit_testing_python_2026/example_code/README.md
@@ -0,0 +1,9 @@
+## Codeschool presentation - Good with Unit testing: Principles, Python and TDD
+
+Here you will find the scripts that I have used to present some examples for unit testing. Feel free to use this to see how unit testing works, or feel free to use these scripts as a template to your unit testing.
+
+To run pytest, simply run `pytest` in terminal, inside the `unit_test_example_code` folder. To run pytest-cov, run the following line instead from your terminal:
+
+```bash
+pytest --cov=bin --cov-report=html
+```
\ No newline at end of file
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/__init__.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py
new file mode 100644
index 0000000..5415a66
--- /dev/null
+++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py
@@ -0,0 +1,44 @@
+"""Utility functions for fetching clinical significance from ClinVar."""
+import requests
+
+
+def get_clinvar_significance(variation_id):
+    """
+    Fetches clinical significance from ClinVar for a given Variation ID.
+
+    Parameters
+    ----------
+    variation_id : str
+        The ClinVar Variation ID (e.g., '12345').
+
+    Returns
+    -------
+    str
+        The clinical significance of the variant.
+
+    Raises
+    ------
+    requests.exceptions.RequestException
+        If the API call fails.
+    ValueError
+        If the Variation ID is not found or data is malformed.
+    """
+    # ClinVar API endpoint for variant data
+    url = ("https://api.ncbi.nlm.nih.gov/variation/v0/beta/"
+           f"clinical-significance/variation/{variation_id}")
+
+    # Send GET request
+    response = requests.get(url, timeout=10)
+
+    # Raise exception for bad status codes
+    response.raise_for_status()
+
+    data = response.json()
+
+    # Parse the nested JSON structure to get the significance
+    try:
+        significance = data['clinical_significance']['description']
+        return significance
+    except KeyError as exc:
+        raise ValueError(
+            f"Could not find significance data for ID {variation_id}") from exc
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/script.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/script.py
new file mode 100644
index 0000000..54b6782
--- /dev/null
+++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/script.py
@@ -0,0 +1,41 @@
+"""
+This module contains a function to calculate the GC content of a DNA sequence.
+"""
+
+
+def calculate_gc_content(sequence):
+    """
+    Calculates the GC content percentage of a DNA sequence.
+
+    Input:
+        sequence (str): A string representing the DNA sequence (e.g., "ATGC").
+
+    Output:
+        float: The GC content percentage, rounded to two decimal places.
+
+    Raises:
+        TypeError: If the input is not a string.
+        ValueError: If the sequence contains characters other than A, T, G, C.
+    """
+
+    if not isinstance(sequence, str):
+        raise TypeError("Sequence must be a string.")
+
+    if not sequence:
+        return 0.0
+
+    # Normalize to uppercase to handle mixed cases
+    seq = sequence.upper()
+
+    # Validate that sequence contains only DNA bases
+    valid_bases = set('ATGC')
+    if not all(base in valid_bases for base in seq):
+        raise ValueError("Sequence contains invalid characters. "
+                         "Only A, T, G, C are allowed.")
+
+    # Count Gs and Cs
+    g_count = seq.count('G')
+    c_count = seq.count('C')
+
+    gc_percentage = ((g_count + c_count) / len(seq)) * 100
+    return round(gc_percentage, 2)
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py
new file mode 100644
index 0000000..864a4f7
--- /dev/null
+++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/script_bed.py
@@ -0,0 +1,53 @@
+"""Utility function for calculating the length of a bed file."""
+import os
+import pandas as pd
+
+
+def calculate_total_bed_length(filepath):
+    """
+    Calculates the total genomic length covered by a BED file.
+
+    This function reads the first three columns of a BED file (Chrom, Start,
+    End), validates that the coordinates are numeric and logical, and
+    returns the sum of the lengths of all regions.
+
+    Parameters
+    ----------
+    filepath : str
+        The path to the BED file to be processed.
+
+    Returns
+    -------
+    int
+        The total number of base pairs across all regions defined in the file.
+
+    Raises
+    ------
+    FileNotFoundError
+        If the provided filepath does not exist on the system.
+    TypeError
+        If the 'start' or 'end' columns contain non-numeric data.
+    ValueError
+        If any record has a start coordinate greater than the end coordinate.
+    """
+
+    if not os.path.exists(filepath):
+        raise FileNotFoundError(f"The file '{filepath}' does not exist.")
+
+    cols = ['chrom', 'start', 'end']
+    df = pd.read_csv(filepath, sep='\t', names=cols, usecols=[0, 1, 2])
+
+    if df.empty:
+        return 0
+
+    # Explicit validation instead of try/except
+    if not pd.api.types.is_numeric_dtype(df['start']) or \
+       not pd.api.types.is_numeric_dtype(df['end']):
+        raise TypeError("BED coordinates must be numeric.")
+
+    df['length'] = df['end'] - df['start']
+
+    if (df['length'] < 0).any():
+        raise ValueError("Found BED record where start > end.")
+
+    return int(df['length'].sum())
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/__init__.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py
new file mode 100644
index 0000000..51af840
--- /dev/null
+++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py
@@ -0,0 +1,44 @@
+"""Unit tests for the calculate_gc_content function in bin/clinvar_utils.py."""
+import pytest
+from bin.clinvar_utils import get_clinvar_significance
+
+
+@pytest.fixture(name="clinvar_mock")
+def mock_clinvar_factory(mocker):
+    """A reusable fixture to mock the ClinVar API response."""
+    # Patch the request.get method in the clinvar_utils module
+    mock = mocker.patch('bin.clinvar_utils.requests.get')
+    # Set a default return value
+    mock_resp = mocker.Mock()
+    mock_resp.json.return_value = {
+        'clinical_significance':
+            {'description': 'Likely Benign'}
+        }
+    mock.return_value = mock_resp
+    return mock
+
+
+class TestClinVarUtils:
+    """Tests for the get_clinvar_significance function."""
+
+    def test_significance_logic(self, _clinvar_mock):
+        """
+        Test that the function correctly extracts clinical
+        significance from mocked API response.
+        """
+        # This test uses the fixture directly
+        result = get_clinvar_significance('67890')
+        assert result == 'Likely Benign'
+
+    def test_key_error(self, clinvar_mock):
+        """
+        Test that a ValueError is raised when the expected
+        keys are missing in the API response.
+        """
+        # Modify the mock to return a JSON without the expected keys
+        clinvar_mock.return_value.json.return_value = {}
+        with pytest.raises(
+            ValueError,
+            match="Could not find significance data for ID 67890"
+        ):
+            get_clinvar_significance('67890')
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
new file mode 100644
index 0000000..d949db7
--- /dev/null
+++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
@@ -0,0 +1,44 @@
+"""Unit tests for the calculate_gc_content function in bin/script.py."""
+import pytest
+from bin.script import calculate_gc_content
+
+
+class TestGCContent:
+    """Class to test valid input scenarios for GC content calculation."""
+
+    def test_basic_sequence(self):
+        """Test standard DNA sequence."""
+        assert calculate_gc_content("GCGC") == 100.0
+
+    def test_mixed_bases(self):
+        """Test a mixture of all bases."""
+        assert calculate_gc_content("ATGCGT") == 50.0
+
+    def test_case_insensitivity(self):
+        """Test that it handles lowercase and mixed-case letters."""
+        assert calculate_gc_content("atgcgt") == 50.0
+        assert calculate_gc_content("AtGcGt") == 50.0
+
+
+class TestGCContentEdgeCases:
+    """Class to test empty, invalid, or unusual input scenarios."""
+
+    def test_empty_string(self):
+        """Test empty string input."""
+        assert calculate_gc_content("") == 0.0
+
+    def test_invalid_input_type(self):
+        """Test that a TypeError is raised for non-string inputs."""
+        with pytest.raises(TypeError, match="Sequence must be a string"):
+            calculate_gc_content(12345)
+
+    def test_rounding(self):
+        """Test that the result is rounded correctly."""
+        # GC content of "GAT" is 1/3 = 33.3333...%
+        assert calculate_gc_content("GAT") == 33.33
+
+    def test_invalid_characters(self):
+        """Test that a ValueError is raised for sequences with invalid characters."""
+        with pytest.raises(ValueError, match="Sequence contains invalid characters. "
+                           "Only A, T, G, C are allowed."):
+            calculate_gc_content("ATGCX")
\ No newline at end of file
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py
new file mode 100644
index 0000000..43ededc
--- /dev/null
+++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py
@@ -0,0 +1,53 @@
+"""Unit tests for the calculate_gc_content function in bin/script_bed.py."""
+import pytest
+from bin.script_bed import calculate_total_bed_length
+
+
+@pytest.fixture(name="valid_bed")
+def valid_bed_factory(tmp_path):
+    """Creates a valid 3-line BED file."""
+    f = tmp_path / "valid.bed"
+    f.write_text("chr1\t100\t200\nchr2\t0\t50")
+    return f
+
+
+@pytest.fixture(name="malformed_bed")
+def malformed_bed_factory(tmp_path):
+    """Creates a BED file with text in the coordinate columns."""
+    f = tmp_path / "text_coords.bed"
+    f.write_text("chr1\tstring_data\t200")
+    return f
+
+
+class TestBedDocumentationContract:
+    """Verifies that the function adheres to its docstring specifications."""
+
+    def test_returns_correct_int(self, valid_bed):
+        """Verifies the 'Returns' section of the docstring."""
+        result = calculate_total_bed_length(valid_bed)
+        assert isinstance(result, int)
+        assert result == 150
+
+    def test_raises_file_not_found(self):
+        """Verifies the 'Raises FileNotFoundError' section."""
+        with pytest.raises(FileNotFoundError):
+            calculate_total_bed_length("imaginary_file.bed")
+
+    def test_raises_type_error(self, malformed_bed):
+        """Verifies the 'Raises TypeError' section."""
+        with pytest.raises(TypeError, match="must be numeric"):
+            calculate_total_bed_length(malformed_bed)
+
+    def test_raises_value_error(self, tmp_path):
+        """Verifies the 'Raises ValueError' section."""
+        f = tmp_path / "invalid_coords.bed"
+        f.write_text("chr1\t200\t100")  # start > end
+        with pytest.raises(ValueError, match="start > end"):
+            calculate_total_bed_length(f)
+
+    def test_empty_file_returns_zero(self, tmp_path):
+        """Verifies that an empty BED file returns 0."""
+        f = tmp_path / "empty.bed"
+        f.write_text("")
+        result = calculate_total_bed_length(f)
+        assert result == 0

From 41a6b0bc789438bdb41321bd6e5e692e19ea731c Mon Sep 17 00:00:00 2001
From: Arun <“arun.karnani.k@gmail.com”>
Date: Thu, 26 Feb 2026 09:53:28 +0000
Subject: [PATCH 2/3] Address PEP8 and coderabbit issues.

---
 .../Unit_testing_python_2026/example_code/README.md        | 4 +++-
 .../example_code/bin/clinvar_utils.py                      | 3 ++-
 .../example_code/tests/test_clinvar_utils.py               | 6 ++++--
 .../example_code/tests/test_script.py                      | 7 ++++---
 .../example_code/tests/test_script_bed.py                  | 4 ++--
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/learning-sessions/Unit_testing_python_2026/example_code/README.md b/learning-sessions/Unit_testing_python_2026/example_code/README.md
index 60c58d9..04e2c11 100644
--- a/learning-sessions/Unit_testing_python_2026/example_code/README.md
+++ b/learning-sessions/Unit_testing_python_2026/example_code/README.md
@@ -2,7 +2,9 @@
 
 Here you will find the scripts that I have used to present some examples for unit testing. Feel free to use this to see how unit testing works, or feel free to use these scripts as a template to your unit testing.
 
-To run pytest, simply run `pytest` in terminal, inside the `unit_test_example_code` folder. To run pytest-cov, run the following line instead from your terminal:
+To run pytest, simply run `pytest` in terminal, inside the `example_code` folder. To run pytest-cov, run the following line instead from your terminal:
+
+**Prerequisites:** Install dependencies with `pip install pytest pytest-cov pytest-mock` before running the tests.
 
 ```bash
 pytest --cov=bin --cov-report=html
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py
index 5415a66..8764143 100644
--- a/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py
+++ b/learning-sessions/Unit_testing_python_2026/example_code/bin/clinvar_utils.py
@@ -38,7 +38,8 @@ def get_clinvar_significance(variation_id):
     # Parse the nested JSON structure to get the significance
     try:
         significance = data['clinical_significance']['description']
-        return significance
     except KeyError as exc:
         raise ValueError(
             f"Could not find significance data for ID {variation_id}") from exc
+    else:
+        return significance
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py
index 51af840..6b77109 100644
--- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py
+++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_clinvar_utils.py
@@ -1,4 +1,4 @@
-"""Unit tests for the calculate_gc_content function in bin/clinvar_utils.py."""
+"""Unit tests for get_clinvar_significance function in bin/clinvar_utils.py."""
 import pytest
 from bin.clinvar_utils import get_clinvar_significance
 
@@ -21,7 +21,9 @@ def mock_clinvar_factory(mocker):
 class TestClinVarUtils:
     """Tests for the get_clinvar_significance function."""
 
-    def test_significance_logic(self, _clinvar_mock):
+    def test_significance_logic(
+        self, clinvar_mock  # pylint: disable=unused-argument
+    ):
         """
         Test that the function correctly extracts clinical
         significance from mocked API response.
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
index d949db7..0c54b11 100644
--- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
+++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
@@ -38,7 +38,8 @@ def test_rounding(self):
         assert calculate_gc_content("GAT") == 33.33
 
     def test_invalid_characters(self):
-        """Test that a ValueError is raised for sequences with invalid characters."""
-        with pytest.raises(ValueError, match="Sequence contains invalid characters. "
+        """Test if ValueError is raised for sequences with invalid chars."""
+        with pytest.raises(ValueError,
+                           match="Sequence contains invalid characters. "
                            "Only A, T, G, C are allowed."):
-            calculate_gc_content("ATGCX")
\ No newline at end of file
+            calculate_gc_content("ATGCX")
diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py
index 43ededc..15076d1 100644
--- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py
+++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script_bed.py
@@ -1,11 +1,11 @@
-"""Unit tests for the calculate_gc_content function in bin/script_bed.py."""
+"""Unit tests for calculate_total_bed_length function in bin/script_bed.py."""
 import pytest
 from bin.script_bed import calculate_total_bed_length
 
 
 @pytest.fixture(name="valid_bed")
 def valid_bed_factory(tmp_path):
-    """Creates a valid 3-line BED file."""
+    """Creates a valid 2-line BED file."""
     f = tmp_path / "valid.bed"
     f.write_text("chr1\t100\t200\nchr2\t0\t50")
     return f

From 1af58e4dd05da63b6857bb48cdc2a3c8e3890124 Mon Sep 17 00:00:00 2001
From: Arun <“arun.karnani.k@gmail.com”>
Date: Thu, 26 Feb 2026 10:01:35 +0000
Subject: [PATCH 3/3] One final coderabbit comment to address

---
 .../Unit_testing_python_2026/example_code/tests/test_script.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
index 0c54b11..1ca0914 100644
--- a/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
+++ b/learning-sessions/Unit_testing_python_2026/example_code/tests/test_script.py
@@ -40,6 +40,5 @@ def test_rounding(self):
     def test_invalid_characters(self):
         """Test if ValueError is raised for sequences with invalid chars."""
         with pytest.raises(ValueError,
-                           match="Sequence contains invalid characters. "
-                           "Only A, T, G, C are allowed."):
+                           match="Sequence contains invalid characters"):
             calculate_gc_content("ATGCX")