diff --git a/deep_ancestry/__init__.py b/deep_ancestry/__init__.py new file mode 100644 index 0000000..850fd39 --- /dev/null +++ b/deep_ancestry/__init__.py @@ -0,0 +1 @@ +from flan import * \ No newline at end of file diff --git a/flan/preprocess/qc.py b/flan/preprocess/qc.py index 28b2b6e..2616fb9 100644 --- a/flan/preprocess/qc.py +++ b/flan/preprocess/qc.py @@ -14,14 +14,12 @@ class QCArgs: class QC: def __init__(self, qc_config: Dict) -> None: self.qc_config = qc_config - def fit_transform(self, cache: FileCache) -> None: run_plink(args_list=['--pfile', str(cache.pfile_path()), 'vzs', '--make-pgen'], args_dict={**{'--out': str(cache.pfile_path()), # Merging dicts here '--set-missing-var-ids': '@:#'}, **self.qc_config}) - def transform(self, source_path: str, dest_path: str) -> None: run_plink(args_list=['--make-pgen', '--pfile', str(source_path)], args_dict={**{'--out': str(dest_path), diff --git a/flan/preprocess/sample_splitter.py b/flan/preprocess/sample_splitter.py index 5a38c34..8644174 100644 --- a/flan/preprocess/sample_splitter.py +++ b/flan/preprocess/sample_splitter.py @@ -29,6 +29,10 @@ def _split_ids(self, y: y can be passed to trigger StratifiedKFold instead of KFold random_state (int): Fixed random_state for train_test_split sklearn function """ + # adding min 5 folds + # num_folds = getattr(self.args, "num_folds", 5) + # self.args.num_folds = num_folds + ids = pandas.read_table(cache.ids_path()).rename(columns={'#IID': 'IID'}).filter(['FID', 'IID']) indices = numpy.arange(ids.shape[0]) if self.args.num_folds == 1: @@ -75,7 +79,7 @@ def _split_genotypes(self, cache: FileCache) -> None: '--out': str(cache.pfile_path(fold_index, part)) }, args_list=['--make-pgen'] - ) + ) def _split_phenotypes(self, cache: FileCache) -> None: phenotype = pandas.read_table(cache.phenotype_path(), names=['IID', 'ancestry', 'in_phase3']) @@ -89,7 +93,7 @@ def _split_phenotypes(self, cache: FileCache) -> None: ) def fit_transform(self, cache: FileCache) -> None: - + self._split_ids(cache) self._split_genotypes(cache) self._split_phenotypes(cache)