Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
run: python3 -m build

- name: Install AlphaImpute2
run: pip install dist/alphaimpute2-0.0.2-py3-none-any.whl
run: pip install dist/alphaimpute2-0.0.3-py3-none-any.whl

- name: Install pytest
run: |
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ __pycache__/
build/
example/outputs
.env
tests/functional_tests/outputs
tests/*_tests/outputs
tests/accuracy_tests/accu_report.txt
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ AlphaImpute2 is available on [PyPI](https://pypi.org/project/AlphaImpute2/):

pip install AlphaImpute2

Availability
============
Example
=======

The AlphaImpute2.zip file contains a python3 wheel file, a manual, and an example dataset.
An example dataset and corresponding script are available in the ``example/`` folder.

Conditions of use
=================
Expand Down
188 changes: 187 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,198 @@
import pytest
import operator
import os
import shutil
import numpy as np


@pytest.fixture(scope="session")
def get_params():
param_file = os.path.join("tests", "accuracy_tests", "simulation_parameters.txt")
with open(param_file, "r") as file:
sim_params = [line.strip().split() for line in file]

params = {}
for param_name, param_value in sim_params:
params[param_name] = float(param_value)

return params


def pytest_configure(config):
"""
Prepare path and report file for functional tests
Prepare path and report file for accuracy tests and functional tests
"""
accu_output_path = os.path.join("tests", "accuracy_tests", "outputs")
if os.path.exists(accu_output_path):
shutil.rmtree(accu_output_path)
os.mkdir(accu_output_path)

func_output_path = os.path.join("tests", "functional_tests", "outputs")
if os.path.exists(func_output_path):
shutil.rmtree(func_output_path)
os.mkdir(func_output_path)

report_path = os.path.join("tests", "accuracy_tests", "accu_report.txt")
f = open(report_path, "w")
f.close()


@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport():
out = yield
report = out.get_result()
if (
report.nodeid[:37] == "tests/accuracy_tests/run_accu_test.py"
and report.when == "call"
):
stdout = report.sections
if "combined" in report.nodeid or "ped_only" in report.nodeid:
num_file = 3
else:
num_file = 2
accu = stdout[-1][-1].split("\n")[-(2 + 3 * num_file) :]
name = accu[0].split()[-1]
with open("tests/accuracy_tests/accu_report.txt", "a") as file:
for i in range(num_file):
assessed_file = accu[i * 3 + 1].split()[-1]
file.write(name + " " + assessed_file + " " + accu[i * 3 + 2] + "\n")
file.write(name + " " + assessed_file + " " + accu[i * 3 + 3] + "\n")


@pytest.hookimpl()
def pytest_terminal_summary(terminalreporter):
param_file = os.path.join("tests", "accuracy_tests", "simulation_parameters.txt")
with open(param_file, "r") as file:
sim_params = [line.strip().split() for line in file]

params = {}
for param_name, param_value in sim_params:
params[param_name] = float(param_value)

nGen = int(params["nGen"])

file_types = [
"genotypes",
"haplotypes",
"segregation",
]
columns = (
"Test Name",
"File Name",
"Accu Type",
"Population Accu",
"Gen1 Accu",
"Gen2 Accu",
"Gen3 Accu",
"Gen4 Accu",
"Gen5 Accu",
)
dt = {"names": columns, "formats": ("U76", "U28", "U25") + ("f4",) * (nGen + 1)}
accu = np.loadtxt("tests/accuracy_tests/accu_report.txt", encoding=None, dtype=dt)

mkr_accu = list(
filter(
lambda x: x["Accu Type"] == "Marker_accuracies",
accu,
)
)
ind_accu = list(
filter(
lambda x: x["Accu Type"] == "Individual_accuracies",
accu,
)
)

mkr_accu_file = {}
ind_accu_file = {}
for file_type in file_types:
mkr_accu_file[file_type] = list(
filter(
lambda x: x["File Name"] == file_type,
mkr_accu,
)
)
ind_accu_file[file_type] = list(
filter(
lambda x: x["File Name"] == file_type,
ind_accu,
)
)

test_names = list(
map(
lambda x: x["Test Name"],
filter(
lambda x: x["File Name"] == file_types[0],
mkr_accu,
),
)
)
test_nums = {}
count = 0
for test_name in test_names:
count += 1
test_nums[test_name] = count

sorted_mkr_accu_file = {}
sorted_ind_accu_file = {}
for file_type in file_types:
sorted_mkr_accu_file[file_type] = sorted(
mkr_accu_file[file_type],
key=operator.itemgetter(*columns[3:]),
reverse=True,
)
sorted_ind_accu_file[file_type] = sorted(
ind_accu_file[file_type],
key=operator.itemgetter(*columns[3:]),
reverse=True,
)

format_first_row = "{:<20} " * (nGen + 2) + "{:<35} "
format_row = "{:<20.0f} " + "{:<20.3f} " * (nGen + 1) + "{:<35} "
out_columns = (
"Test Num",
"Population Accu",
"Gen1 Accu",
"Gen2 Accu",
"Gen3 Accu",
"Gen4 Accu",
"Gen5 Accu",
"Test Name",
)

def write_report(accu_type, sorted=False):
if not sorted:
terminalreporter.write_sep("=", accu_type + " Accuracy")
if accu_type == "Marker":
reports = mkr_accu_file
else:
reports = ind_accu_file
else:
terminalreporter.write_sep("=", accu_type + " Accuracy (Order by accuracy)")
if accu_type == "Marker":
reports = sorted_mkr_accu_file
else:
reports = sorted_ind_accu_file

for file_type in file_types:
terminalreporter.write_sep("-", file_type)
terminalreporter.write_line(format_first_row.format(*out_columns))
for test in reports[file_type]:
terminalreporter.write_line(
format_row.format(
test_nums[test["Test Name"]],
test["Population Accu"],
test["Gen1 Accu"],
test["Gen2 Accu"],
test["Gen3 Accu"],
test["Gen4 Accu"],
test["Gen5 Accu"],
test["Test Name"],
)
)

write_report("Marker", sorted=False)
write_report("Individual", sorted=False)
write_report("Marker", sorted=True)
write_report("Individual", sorted=True)
44 changes: 10 additions & 34 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@ AlphaImpute2 is program to perform imputation in a range of animal and plant spe

Please report any issues to `John.Hickey@roslin.ed.ac.uk <John.Hickey@roslin.ed.ac.uk>`_ or `awhalen@roslin.ed.ac.uk <awhalen@roslin.ed.ac.uk>`_.

Availability
Installation
------------

AlphaImpute2 is available from the `AlphaGenes <http://www.alphagenes.roslin.ed.ac.uk/software-packages/AlphaImpute2/>`_ website. The download files contains a python wheel file along with this documentation and an example.
AlphaImpute2 is available from the `PyPI <https://pypi.org/project/AlphaImpute2/>`_ website.

You can install the latest release with

.. code-block:: bash
pip install AlphaImpute2

The source code is available at `GitHub <https://github.com/AlphaGenes/AlphaImpute2>`_.

Conditions of use
-----------------
Expand Down Expand Up @@ -68,29 +75,20 @@ Input Arguments
::

Input arguments:
-bfile [BFILE ...] A file in plink (binary) format. Only stable on Linux).
-genotypes [GENOTYPES ...]
A file in AlphaGenes format.
-reference [REFERENCE ...]
A haplotype reference panel in AlphaGenes format.
-seqfile [SEQFILE ...]
A sequence data file.
-pedigree [PEDIGREE ...]
A pedigree file in AlphaGenes format.
-phasefile [PHASEFILE ...]
A phase file in AlphaGenes format.
-startsnp STARTSNP The first marker to consider. The first marker in the file is marker '1'. Default: 1.
-stopsnp STOPSNP The last marker to consider. Default: all markers considered.
-seed SEED A random seed to use for debugging.

AlphaImpute2 requires a genotype file and an optional pedigree file to run the analysis.

AlphaImpute2 supports binary plink files, ``-bfile``, genotype files in the AlphaGenesFormat, ``-genotypes``. A pedigree file may be supplied using the ``-pedigree`` option.
A pedigree file may be supplied using the ``-pedigree`` option.

Use the ``-startsnp`` and ``-stopsnp`` comands to run the analysis only on a subset of markers.

Binary plink files require the package ``alphaplinkpython``. This can be installed via ``pip`` but is only stable for Linux.

Imputation arguments:
------------------------
::
Expand Down Expand Up @@ -221,28 +219,6 @@ Example: ::
id3 id1 id2
id4 id1 id2

Phase file
-----------

The phase file gives the phased haplotypes (either 0 or 1) for each individual in two lines. For individuals where we can determine the haplotype of origin, the first line will provide information on the paternal haplotype, and the second line will provide information on the maternal haplotype.

Example: ::

id1 0 1 9 0 # Maternal haplotype
id1 0 1 9 0 # Paternal haplotype
id2 1 1 1 0
id2 0 0 0 1
id3 1 0 1 0
id3 1 0 1 0
id4 0 1 0 0
id4 0 1 1 0


Binary plink file
-----------------

AlphaImpute2 supports the use of binary plink files using the package ``AlphaPlinkPython``. AlphaImpute2 will use the pedigree supplied by the ``.fam`` file if a pedigree file is not supplied. Otherwise the pedigree file will be used and the ``.fam`` file will be ignored.


Output file formats
~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "AlphaImpute2"
version = "0.0.2"
version = "0.0.3"
authors = [
{ name="Andrew Whalen", email="awhalen@roslin.ed.ac.uk" },
]
Expand Down
23 changes: 19 additions & 4 deletions src/alphaimpute2/Imputation/Heuristic_Peeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,21 +206,36 @@ def call_genotypes(ind, final_cutoff, error_rate):
else:
nLoci = len(ind.genotypes)

penetrance = np.full((4, nLoci), 1, dtype=np.float32)
ind.peeling_view.setValueFromGenotypes(penetrance, error_rate)
genotypeProbabilities = np.full((4, nLoci), 1, dtype=np.float32)
ind.peeling_view.setValueFromGenotypes(genotypeProbabilities, error_rate)

if ind.sire is not None and ind.dam is not None:
# Re-calculate parent genotypes with all sources of information.
ind.sire.peeling_view.setGenotypesAll(ind.sire.peeling_view.currentCutoff)
ind.dam.peeling_view.setGenotypesAll(ind.dam.peeling_view.currentCutoff)

# Calculate anterior from the parents.

genotypeProbabilities = ind.peeling_view.genotypeProbabilities
anterior = getAnterior(
ind.peeling_view, ind.sire.peeling_view, ind.dam.peeling_view
)
penetrance *= anterior # Add the penetrance with the anterior. Normalization will happen within the function.
genotypeProbabilities *= anterior # Add the penetrance with the anterior. Normalization will happen within the function.

ind.peeling_view.setGenotypesFromGenotypeProbabilities(
genotypeProbabilities, final_cutoff
)

ind.peeling_view.setGenotypesFromGenotypeProbabilities(penetrance, final_cutoff)
if final_cutoff < 0.5:
nLoci = len(ind.genotypes)
for i in range(nLoci):
if ind.genotypes[i] == 1:
if ind.haplotypes[0][i] + ind.haplotypes[1][i] != 1:
# correct the genotype-haplotype mismatch
phase_probs = ind.peeling_view.genotypeProbabilities[1:3, i]
phase = np.argmax(phase_probs)
ind.haplotypes[0][i] = phase
ind.haplotypes[1][i] = 1 - phase


@time_func("Core peeling cycles")
Expand Down
Loading