HighlanderLab · pull · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -36,7 +36,7 @@ jobs:
               run: python3 -m build
 
             - name: Install AlphaImpute2
-              run: pip install dist/alphaimpute2-0.0.2-py3-none-any.whl
+              run: pip install dist/alphaimpute2-0.0.3-py3-none-any.whl
 
             - name: Install pytest
               run: |

diff --git a/.gitignore b/.gitignore
@@ -9,4 +9,5 @@ __pycache__/
 build/
 example/outputs
 .env
-tests/functional_tests/outputs
+tests/*_tests/outputs
+tests/accuracy_tests/accu_report.txt
diff --git a/README.md b/README.md
@@ -15,10 +15,10 @@ AlphaImpute2 is available on [PyPI](https://pypi.org/project/AlphaImpute2/):
 
     pip install AlphaImpute2
 
-Availability
-============
+Example
+=======
 
-The AlphaImpute2.zip file contains a python3 wheel file, a manual, and an example dataset.
+An example dataset and corresponding script are available in the ``example/`` folder. 
 
 Conditions of use
 =================

diff --git a/conftest.py b/conftest.py
@@ -1,12 +1,198 @@
+import pytest
+import operator
 import os
 import shutil
+import numpy as np
+
+
+@pytest.fixture(scope="session")
+def get_params():
+    param_file = os.path.join("tests", "accuracy_tests", "simulation_parameters.txt")
+    with open(param_file, "r") as file:
+        sim_params = [line.strip().split() for line in file]
+
+    params = {}
+    for param_name, param_value in sim_params:
+        params[param_name] = float(param_value)
+
+    return params
 
 
 def pytest_configure(config):
     """
-    Prepare path and report file for functional tests
+    Prepare path and report file for accuracy tests and functional tests
     """
+    accu_output_path = os.path.join("tests", "accuracy_tests", "outputs")
+    if os.path.exists(accu_output_path):
+        shutil.rmtree(accu_output_path)
+    os.mkdir(accu_output_path)
+
     func_output_path = os.path.join("tests", "functional_tests", "outputs")
     if os.path.exists(func_output_path):
         shutil.rmtree(func_output_path)
     os.mkdir(func_output_path)
+
+    report_path = os.path.join("tests", "accuracy_tests", "accu_report.txt")
+    f = open(report_path, "w")
+    f.close()
+
+
+@pytest.hookimpl(hookwrapper=True)
+def pytest_runtest_makereport():
+    out = yield
+    report = out.get_result()
+    if (
+        report.nodeid[:37] == "tests/accuracy_tests/run_accu_test.py"
+        and report.when == "call"
+    ):
+        stdout = report.sections
+        if "combined" in report.nodeid or "ped_only" in report.nodeid:
+            num_file = 3
+        else:
+            num_file = 2
+        accu = stdout[-1][-1].split("\n")[-(2 + 3 * num_file) :]
+        name = accu[0].split()[-1]
+        with open("tests/accuracy_tests/accu_report.txt", "a") as file:
+            for i in range(num_file):
+                assessed_file = accu[i * 3 + 1].split()[-1]
+                file.write(name + " " + assessed_file + " " + accu[i * 3 + 2] + "\n")
+                file.write(name + " " + assessed_file + " " + accu[i * 3 + 3] + "\n")
+
+
+@pytest.hookimpl()
+def pytest_terminal_summary(terminalreporter):
+    param_file = os.path.join("tests", "accuracy_tests", "simulation_parameters.txt")
+    with open(param_file, "r") as file:
+        sim_params = [line.strip().split() for line in file]
+
+    params = {}
+    for param_name, param_value in sim_params:
+        params[param_name] = float(param_value)
+
+    nGen = int(params["nGen"])
+
+    file_types = [
+        "genotypes",
+        "haplotypes",
+        "segregation",
+    ]
+    columns = (
+        "Test Name",
+        "File Name",
+        "Accu Type",
+        "Population Accu",
+        "Gen1 Accu",
+        "Gen2 Accu",
+        "Gen3 Accu",
+        "Gen4 Accu",
+        "Gen5 Accu",
+    )
+    dt = {"names": columns, "formats": ("U76", "U28", "U25") + ("f4",) * (nGen + 1)}
+    accu = np.loadtxt("tests/accuracy_tests/accu_report.txt", encoding=None, dtype=dt)
+
+    mkr_accu = list(
+        filter(
+            lambda x: x["Accu Type"] == "Marker_accuracies",
+            accu,
+        )
+    )
+    ind_accu = list(
+        filter(
+            lambda x: x["Accu Type"] == "Individual_accuracies",
+            accu,
+        )
+    )
+
+    mkr_accu_file = {}
+    ind_accu_file = {}
+    for file_type in file_types:
+        mkr_accu_file[file_type] = list(
+            filter(
+                lambda x: x["File Name"] == file_type,
+                mkr_accu,
+            )
+        )
+        ind_accu_file[file_type] = list(
+            filter(
+                lambda x: x["File Name"] == file_type,
+                ind_accu,
+            )
+        )
+
+    test_names = list(
+        map(
+            lambda x: x["Test Name"],
+            filter(
+                lambda x: x["File Name"] == file_types[0],
+                mkr_accu,
+            ),
+        )
+    )
+    test_nums = {}
+    count = 0
+    for test_name in test_names:
+        count += 1
+        test_nums[test_name] = count
+
+    sorted_mkr_accu_file = {}
+    sorted_ind_accu_file = {}
+    for file_type in file_types:
+        sorted_mkr_accu_file[file_type] = sorted(
+            mkr_accu_file[file_type],
+            key=operator.itemgetter(*columns[3:]),
+            reverse=True,
+        )
+        sorted_ind_accu_file[file_type] = sorted(
+            ind_accu_file[file_type],
+            key=operator.itemgetter(*columns[3:]),
+            reverse=True,
+        )
+
+    format_first_row = "{:<20} " * (nGen + 2) + "{:<35} "
+    format_row = "{:<20.0f} " + "{:<20.3f} " * (nGen + 1) + "{:<35} "
+    out_columns = (
+        "Test Num",
+        "Population Accu",
+        "Gen1 Accu",
+        "Gen2 Accu",
+        "Gen3 Accu",
+        "Gen4 Accu",
+        "Gen5 Accu",
+        "Test Name",
+    )
+
+    def write_report(accu_type, sorted=False):
+        if not sorted:
+            terminalreporter.write_sep("=", accu_type + " Accuracy")
+            if accu_type == "Marker":
+                reports = mkr_accu_file
+            else:
+                reports = ind_accu_file
+        else:
+            terminalreporter.write_sep("=", accu_type + " Accuracy (Order by accuracy)")
+            if accu_type == "Marker":
+                reports = sorted_mkr_accu_file
+            else:
+                reports = sorted_ind_accu_file
+
+        for file_type in file_types:
+            terminalreporter.write_sep("-", file_type)
+            terminalreporter.write_line(format_first_row.format(*out_columns))
+            for test in reports[file_type]:
+                terminalreporter.write_line(
+                    format_row.format(
+                        test_nums[test["Test Name"]],
+                        test["Population Accu"],
+                        test["Gen1 Accu"],
+                        test["Gen2 Accu"],
+                        test["Gen3 Accu"],
+                        test["Gen4 Accu"],
+                        test["Gen5 Accu"],
+                        test["Test Name"],
+                    )
+                )
+
+    write_report("Marker", sorted=False)
+    write_report("Individual", sorted=False)
+    write_report("Marker", sorted=True)
+    write_report("Individual", sorted=True)
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -22,10 +22,17 @@ AlphaImpute2 is program to perform imputation in a range of animal and plant spe
 
 Please report any issues to `John.Hickey@roslin.ed.ac.uk <John.Hickey@roslin.ed.ac.uk>`_ or `awhalen@roslin.ed.ac.uk <awhalen@roslin.ed.ac.uk>`_.
 
-Availability
+Installation
 ------------
 
-AlphaImpute2 is available from the `AlphaGenes <http://www.alphagenes.roslin.ed.ac.uk/software-packages/AlphaImpute2/>`_ website. The download files contains a python wheel file along with this documentation and an example. 
+AlphaImpute2 is available from the `PyPI <https://pypi.org/project/AlphaImpute2/>`_ website.
+
+You can install the latest release with
+
+.. code-block:: bash
+  pip install AlphaImpute2
+
+The source code is available at `GitHub <https://github.com/AlphaGenes/AlphaImpute2>`_.
 
 Conditions of use
 -----------------
@@ -68,29 +75,20 @@ Input Arguments
 ::
 
     Input arguments:
-      -bfile [BFILE ...]    A file in plink (binary) format. Only stable on Linux).
       -genotypes [GENOTYPES ...]
                             A file in AlphaGenes format.
-      -reference [REFERENCE ...]
-                            A haplotype reference panel in AlphaGenes format.
-      -seqfile [SEQFILE ...]
-                            A sequence data file.
       -pedigree [PEDIGREE ...]
                             A pedigree file in AlphaGenes format.
-      -phasefile [PHASEFILE ...]
-                            A phase file in AlphaGenes format.
       -startsnp STARTSNP    The first marker to consider. The first marker in the file is marker '1'. Default: 1.
       -stopsnp STOPSNP      The last marker to consider. Default: all markers considered.
       -seed SEED            A random seed to use for debugging.
 
 AlphaImpute2 requires a genotype file and an optional pedigree file to run the analysis.
 
-AlphaImpute2 supports binary plink files, ``-bfile``, genotype files in the AlphaGenesFormat, ``-genotypes``. A pedigree file may be supplied using the ``-pedigree`` option. 
+A pedigree file may be supplied using the ``-pedigree`` option. 
 
 Use the ``-startsnp`` and ``-stopsnp`` comands to run the analysis only on a subset of markers.
 
-Binary plink files require the package ``alphaplinkpython``. This can be installed via ``pip`` but is only stable for Linux.
-
 Imputation arguments: 
 ------------------------
 ::
@@ -221,28 +219,6 @@ Example: ::
   id3 id1 id2
   id4 id1 id2
 
-Phase file
------------
-
-The phase file gives the phased haplotypes (either 0 or 1) for each individual in two lines. For individuals where we can determine the haplotype of origin, the first line will provide information on the paternal haplotype, and the second line will provide information on the maternal haplotype.
-
-Example: ::
-
-  id1 0 1 9 0 # Maternal haplotype
-  id1 0 1 9 0 # Paternal haplotype
-  id2 1 1 1 0
-  id2 0 0 0 1
-  id3 1 0 1 0
-  id3 1 0 1 0 
-  id4 0 1 0 0
-  id4 0 1 1 0
-
-
-Binary plink file
------------------
-
-AlphaImpute2 supports the use of binary plink files using the package ``AlphaPlinkPython``. AlphaImpute2 will use the pedigree supplied by the ``.fam`` file if a pedigree file is not supplied. Otherwise the pedigree file will be used and the ``.fam`` file will be ignored. 
-
 
 Output file formats
 ~~~~~~~~~~~~~~~~~~~

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "AlphaImpute2"
-version = "0.0.2"
+version = "0.0.3"
 authors = [
   { name="Andrew Whalen", email="awhalen@roslin.ed.ac.uk" },
 ]

diff --git a/src/alphaimpute2/Imputation/Heuristic_Peeling.py b/src/alphaimpute2/Imputation/Heuristic_Peeling.py
@@ -206,21 +206,36 @@ def call_genotypes(ind, final_cutoff, error_rate):
     else:
         nLoci = len(ind.genotypes)
 
-        penetrance = np.full((4, nLoci), 1, dtype=np.float32)
-        ind.peeling_view.setValueFromGenotypes(penetrance, error_rate)
+        genotypeProbabilities = np.full((4, nLoci), 1, dtype=np.float32)
+        ind.peeling_view.setValueFromGenotypes(genotypeProbabilities, error_rate)
 
         if ind.sire is not None and ind.dam is not None:
             # Re-calculate parent genotypes with all sources of information.
             ind.sire.peeling_view.setGenotypesAll(ind.sire.peeling_view.currentCutoff)
             ind.dam.peeling_view.setGenotypesAll(ind.dam.peeling_view.currentCutoff)
 
             # Calculate anterior from the parents.
+
+            genotypeProbabilities = ind.peeling_view.genotypeProbabilities
             anterior = getAnterior(
                 ind.peeling_view, ind.sire.peeling_view, ind.dam.peeling_view
             )
-            penetrance *= anterior  # Add the penetrance with the anterior. Normalization will happen within the function.
+            genotypeProbabilities *= anterior  # Add the penetrance with the anterior. Normalization will happen within the function.
+
+        ind.peeling_view.setGenotypesFromGenotypeProbabilities(
+            genotypeProbabilities, final_cutoff
+        )
 
-        ind.peeling_view.setGenotypesFromGenotypeProbabilities(penetrance, final_cutoff)
+    if final_cutoff < 0.5:
+        nLoci = len(ind.genotypes)
+        for i in range(nLoci):
+            if ind.genotypes[i] == 1:
+                if ind.haplotypes[0][i] + ind.haplotypes[1][i] != 1:
+                    # correct the genotype-haplotype mismatch
+                    phase_probs = ind.peeling_view.genotypeProbabilities[1:3, i]
+                    phase = np.argmax(phase_probs)
+                    ind.haplotypes[0][i] = phase
+                    ind.haplotypes[1][i] = 1 - phase
 
 
 @time_func("Core peeling cycles")