Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions deep_ancestry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from flan import *
2 changes: 0 additions & 2 deletions flan/preprocess/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@ class QCArgs:
class QC:
def __init__(self, qc_config: Dict) -> None:
self.qc_config = qc_config

def fit_transform(self, cache: FileCache) -> None:
run_plink(args_list=['--pfile', str(cache.pfile_path()), 'vzs', '--make-pgen'],
args_dict={**{'--out': str(cache.pfile_path()), # Merging dicts here
'--set-missing-var-ids': '@:#'},
**self.qc_config})


def transform(self, source_path: str, dest_path: str) -> None:
run_plink(args_list=['--make-pgen', '--pfile', str(source_path)],
args_dict={**{'--out': str(dest_path),
Expand Down
8 changes: 6 additions & 2 deletions flan/preprocess/sample_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ def _split_ids(self,
y: y can be passed to trigger StratifiedKFold instead of KFold
random_state (int): Fixed random_state for train_test_split sklearn function
"""
# adding min 5 folds
# num_folds = getattr(self.args, "num_folds", 5)
# self.args.num_folds = num_folds

ids = pandas.read_table(cache.ids_path()).rename(columns={'#IID': 'IID'}).filter(['FID', 'IID'])
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
indices = numpy.arange(ids.shape[0])
if self.args.num_folds == 1:
Expand Down Expand Up @@ -75,7 +79,7 @@ def _split_genotypes(self, cache: FileCache) -> None:
'--out': str(cache.pfile_path(fold_index, part))
},
args_list=['--make-pgen']
)
)

def _split_phenotypes(self, cache: FileCache) -> None:
phenotype = pandas.read_table(cache.phenotype_path(), names=['IID', 'ancestry', 'in_phase3'])
Expand All @@ -89,7 +93,7 @@ def _split_phenotypes(self, cache: FileCache) -> None:
)

def fit_transform(self, cache: FileCache) -> None:

self._split_ids(cache)
self._split_genotypes(cache)
self._split_phenotypes(cache)
Expand Down