diff --git a/docs/source/As1.rst b/docs/source/As1.rst new file mode 100644 index 000000000..f0fe8cba7 --- /dev/null +++ b/docs/source/As1.rst @@ -0,0 +1,150 @@ +As1 +===== + +This page provides a curated list of functions and properties available in the ``malariagen_data`` API +for data on *Anopheles stephensi* species mosquitoes. + +To set up the API, use the following code:: + + import malariagen_data + as1 = malariagen_data.As1() + +All the functions below can then be accessed as methods on the ``as1`` object. E.g., to call the +``sample_metadata()`` function, do:: + + df_samples = as1.sample_metadata() + +For more information about the data and terms of use, please see the +`MalariaGEN website `_ or contact support@malariagen.net. + +.. currentmodule:: malariagen_data.as1.As1 + +Basic data access +----------------- +.. autosummary:: + :toctree: generated/ + + releases + sample_sets + lookup_release + lookup_study + +Reference genome data access +---------------------------- +.. autosummary:: + :toctree: generated/ + + contigs + genome_sequence + genome_features + plot_transcript + plot_genes + +Sample metadata access +---------------------- +.. autosummary:: + :toctree: generated/ + + sample_metadata + add_extra_metadata + clear_extra_metadata + lookup_sample + count_samples + plot_samples_bar + plot_samples_interactive_map + plot_sample_location_mapbox + plot_sample_location_geo + wgs_data_catalog + cohorts + +SNP data access +--------------- +.. autosummary:: + :toctree: generated/ + + site_mask_ids + snp_calls + snp_allele_counts + plot_snps + site_annotations + is_accessible + biallelic_snp_calls + biallelic_diplotypes + biallelic_snps_to_plink + +Integrative genomics viewer (IGV) +--------------------------------- +.. autosummary:: + :toctree: generated/ + + igv + view_alignments + +SNP frequency analysis +---------------------- +.. autosummary:: + :toctree: generated/ + + snp_allele_frequencies + snp_allele_frequencies_advanced + aa_allele_frequencies + aa_allele_frequencies_advanced + plot_frequencies_heatmap + plot_frequencies_time_series + plot_frequencies_interactive_map + +Principal components analysis (PCA) +----------------------------------- +.. autosummary:: + :toctree: generated/ + + pca + plot_pca_variance + plot_pca_coords + plot_pca_coords_3d + +Genetic distance and neighbour-joining trees (NJT) +-------------------------------------------------- +.. autosummary:: + :toctree: generated/ + + plot_njt + njt + biallelic_diplotype_pairwise_distances + +Heterozygosity analysis +----------------------- +.. autosummary:: + :toctree: generated/ + + plot_heterozygosity + roh_hmm + plot_roh + +Diversity analysis +------------------ +.. autosummary:: + :toctree: generated/ + + cohort_diversity_stats + diversity_stats + plot_diversity_stats + +Diplotype clustering +-------------------- +.. autosummary:: + :toctree: generated/ + + plot_diplotype_clustering + plot_diplotype_clustering_advanced + +Fst analysis +------------ +.. autosummary:: + :toctree: generated/ + + average_fst + pairwise_average_fst + plot_pairwise_average_fst + fst_gwss + plot_fst_gwss diff --git a/malariagen_data/__init__.py b/malariagen_data/__init__.py index 48e4eb2d9..69df5dfee 100644 --- a/malariagen_data/__init__.py +++ b/malariagen_data/__init__.py @@ -4,6 +4,7 @@ from .af1 import Af1 from .ag3 import Ag3 from .amin1 import Amin1 +from .as1 import As1 from .anopheles import AnophelesDataResource, Region from .pf7 import Pf7 from .pf8 import Pf8 diff --git a/malariagen_data/as1.py b/malariagen_data/as1.py new file mode 100644 index 000000000..728c9d7af --- /dev/null +++ b/malariagen_data/as1.py @@ -0,0 +1,237 @@ +import sys + +import plotly.express as px # type: ignore + +import malariagen_data +from .anopheles import AnophelesDataResource + +MAJOR_VERSION_NUMBER = 1 +MAJOR_VERSION_PATH = "v1.0" +CONFIG_PATH = "v1.0-config.json" +GCS_DEFAULT_URL = "gs://vo_aste_release_master_us_central1/" +GCS_DEFAULT_PUBLIC_URL = "gs://vo_aste_release_master_us_central1/" +GCS_REGION_URLS = { + "us-central1": "gs://vo_aste_release_master_us_central1", +} + +TAXON_PALETTE = px.colors.qualitative.Plotly +TAXON_COLORS = { + "stephensi": TAXON_PALETTE[0], +} + +XPEHH_GWSS_CACHE_NAME = "as1_xpehh_gwss_v1" +IHS_GWSS_CACHE_NAME = "as1_ihs_gwss_v1" +ROH_HMM_CACHE_NAME = "as1_roh_hmm_v1" + + +class As1(AnophelesDataResource): + """Provides access to data from As1.0 releases. + + Parameters + ---------- + url : str, optional + Base path to data. Defaults to use Google Cloud Storage, or can + be a local path on your file system if data have been downloaded. + site_filters_analysis : str, optional + Site filters analysis version. + bokeh_output_notebook : bool, optional + If True (default), configure bokeh to output plots to the notebook. + results_cache : str, optional + Path to directory on local file system to save results. + log : str or stream, optional + File path or stream output for logging messages. + debug : bool, optional + Set to True to enable debug level logging. + show_progress : bool, optional + If True, show a progress bar during longer-running computations. The default can be overridden using an environmental variable named MGEN_SHOW_PROGRESS. + check_location : bool, optional + If True, use ipinfo to check the location of the client system. + **kwargs + Passed through to fsspec when setting up file system access. + + Examples + -------- + Access data from Google Cloud Storage (default): + + >>> import malariagen_data + >>> adir1 = malariagen_data.As1() + + Access data downloaded to a local file system: + + >>> adir1 = malariagen_data.As1("/local/path/to/vo_as_release/") + + Access data from Google Cloud Storage, with caching on the local file system + in a directory named "gcs_cache": + + >>> as1 = malariagen_data.As1( + ... "simplecache::gs://vo_aste_release_master_us_central1", + ... simplecache=dict(cache_storage="gcs_cache"), + ... ) + + Set up caching of some longer-running computations on the local file system, + in a directory named "results_cache": + + >>> as1 = malariagen_data.As1(results_cache="results_cache") + + """ + + _xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME + _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME + _roh_hmm_cache_name = ROH_HMM_CACHE_NAME + + def __init__( + self, + url=None, + public_url=GCS_DEFAULT_PUBLIC_URL, + bokeh_output_notebook=True, + results_cache=None, + log=sys.stdout, + debug=False, + show_progress=None, + check_location=True, + cohorts_analysis=None, + site_filters_analysis=None, + discordant_read_calls_analysis=None, + pre=False, + tqdm_class=None, + unrestricted_use_only=False, + surveillance_use_only=False, + **storage_options, + ): + super().__init__( + url=url, + public_url=public_url, + config_path=CONFIG_PATH, + cohorts_analysis=cohorts_analysis, + aim_analysis=None, + aim_metadata_dtype=None, + aim_ids=None, + aim_palettes=None, + site_filters_analysis=site_filters_analysis, + discordant_read_calls_analysis=discordant_read_calls_analysis, + default_site_mask="stephensi", + default_phasing_analysis="stephensi", + default_coverage_calls_analysis="stephensi", + bokeh_output_notebook=bokeh_output_notebook, + results_cache=results_cache, + log=log, + debug=debug, + show_progress=show_progress, + check_location=check_location, + pre=pre, + gcs_default_url=GCS_DEFAULT_URL, + gcs_region_urls=GCS_REGION_URLS, + major_version_number=MAJOR_VERSION_NUMBER, + major_version_path=MAJOR_VERSION_PATH, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + storage_options=storage_options, + tqdm_class=tqdm_class, + taxon_colors=TAXON_COLORS, + virtual_contigs=None, + inversion_tag_path=None, + unrestricted_use_only=unrestricted_use_only, + surveillance_use_only=surveillance_use_only, + ) + + def __repr__(self): + text = ( + f"\n" + f"Storage URL : {self._url}\n" + f"Data releases available : {', '.join(self._available_releases)}\n" + f"Results cache : {self._results_cache}\n" + f"Cohorts analysis : {self._cohorts_analysis}\n" + f"Site filters analysis : {self._site_filters_analysis}\n" + f"Software version : malariagen_data {malariagen_data.__version__}\n" + f"Client location : {self.client_location}\n" + f"Data filtered to unrestricted use only: {self._unrestricted_use_only}\n" + f"Data filtered to surveillance use only: {self._surveillance_use_only}\n" + f"Relevant data releases : {', '.join(self.releases)}\n" + f"---\n" + f"Please note that data are subject to terms of use,\n" + f"for more information see https://www.malariagen.net/data\n" + f"or contact support@malariagen.net. For API documentation see \n" + f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/As1.html" + ) + return text + + def _repr_html_(self): + html = f""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MalariaGEN As1 API client
+ Please note that data are subject to terms of use, + for more information see + the MalariaGEN website or contact support@malariagen.net. + See also the As1 API docs. +
+ Storage URL + {self._url}
+ Data releases available + {", ".join(self._available_releases)}
+ Results cache + {self._results_cache}
+ Cohorts analysis + {self._cohorts_analysis}
+ Site filters analysis + {self._site_filters_analysis}
+ Software version + malariagen_data {malariagen_data.__version__}
+ Client location + {self.client_location}
+ Data filtered for unrestricted use only + {self._unrestricted_use_only}
+ Data filtered for surveillance use only + {self._surveillance_use_only}
+ Relevant data releases + {", ".join(self.releases)}
+ """ + return html diff --git a/tests/anoph/test_base.py b/tests/anoph/test_base.py index c49486a3f..181b36b3d 100644 --- a/tests/anoph/test_base.py +++ b/tests/anoph/test_base.py @@ -10,6 +10,8 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 +from malariagen_data import as1 as _as1 + from malariagen_data.anoph.base import AnophelesBase from malariagen_data.util import LoggingHelper @@ -62,6 +64,18 @@ def amin1_sim_api(amin1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesBase( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + ) + + # N.B., here we use pytest_cases to parametrize tests. Each # function whose name begins with "case_" defines a set of # inputs to the test functions. See the documentation for @@ -90,6 +104,10 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_config(fixture, api): config = api.config diff --git a/tests/anoph/test_distance.py b/tests/anoph/test_distance.py index 1b969096e..3d7df316a 100644 --- a/tests/anoph/test_distance.py +++ b/tests/anoph/test_distance.py @@ -6,6 +6,8 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 +from malariagen_data import as1 as _as1 + from malariagen_data.anoph.distance import AnophelesDistanceAnalysis from malariagen_data.anoph import pca_params @@ -73,6 +75,24 @@ def adir1_sim_api(adir1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesDistanceAnalysis( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, + ) + + # N.B., here we use pytest_cases to parametrize tests. Each # function whose name begins with "case_" defines a set of # inputs to the test functions. See the documentation for @@ -97,6 +117,10 @@ def case_adir1_sim(adir1_sim_fixture, adir1_sim_api): return adir1_sim_fixture, adir1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + def check_biallelic_diplotype_pairwise_distance(*, api, data_params, metric): # Check available data. ds = api.biallelic_snp_calls(**data_params) diff --git a/tests/anoph/test_fst.py b/tests/anoph/test_fst.py index 790eb11b1..a7b63e856 100644 --- a/tests/anoph/test_fst.py +++ b/tests/anoph/test_fst.py @@ -9,6 +9,8 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 +from malariagen_data import as1 as _as1 + from malariagen_data.anoph.fst import AnophelesFstAnalysis @@ -76,6 +78,24 @@ def adir1_sim_api(adir1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesFstAnalysis( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, + ) + + # N.B., here we use pytest_cases to parametrize tests. Each # function whose name begins with "case_" defines a set of # inputs to the test functions. See the documentation for @@ -100,6 +120,10 @@ def case_adir1_sim(adir1_sim_fixture, adir1_sim_api): return adir1_sim_fixture, adir1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_fst_gwss(fixture, api: AnophelesFstAnalysis): # Set up test parameters. diff --git a/tests/anoph/test_g123.py b/tests/anoph/test_g123.py index 80ca3a9e3..4c152ce1c 100644 --- a/tests/anoph/test_g123.py +++ b/tests/anoph/test_g123.py @@ -7,6 +7,8 @@ from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 +from malariagen_data import as1 as _as1 + from malariagen_data.anoph.g123 import AnophelesG123Analysis @@ -95,6 +97,25 @@ def amin1_sim_api(amin1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesG123Analysis( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, + default_phasing_analysis=None, + ) + + # N.B., here we use pytest_cases to parametrize tests. Each # function whose name begins with "case_" defines a set of # inputs to the test functions. See the documentation for @@ -123,6 +144,10 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + def check_g123_gwss(*, api, g123_params): # Run main gwss function under test. x, g123 = api.g123_gwss(**g123_params) diff --git a/tests/anoph/test_genome_features.py b/tests/anoph/test_genome_features.py index ba7563d97..f287543f4 100644 --- a/tests/anoph/test_genome_features.py +++ b/tests/anoph/test_genome_features.py @@ -7,7 +7,8 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 -from malariagen_data import adir1 as _adir1 +from malariagen_data import as1 as _as1 + from malariagen_data.anoph.genome_features import AnophelesGenomeFeaturesData from malariagen_data.util import Region, _resolve_region @@ -44,13 +45,13 @@ def af1_sim_api(af1_sim_fixture): @pytest.fixture -def adir1_sim_api(adir1_sim_fixture): +def as1_sim_api(as1_sim_fixture): return AnophelesGenomeFeaturesData( - url=adir1_sim_fixture.url, - public_url=adir1_sim_fixture.url, - config_path=_adir1.CONFIG_PATH, - major_version_number=_adir1.MAJOR_VERSION_NUMBER, - major_version_path=_adir1.MAJOR_VERSION_PATH, + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, pre=False, gff_gene_type="protein_coding_gene", gff_gene_name_attribute="Note", @@ -70,6 +71,10 @@ def case_adir1_sim(adir1_sim_fixture, adir1_sim_api): return adir1_sim_fixture, adir1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + gff3_cols = [ "contig", "source", diff --git a/tests/anoph/test_genome_sequence.py b/tests/anoph/test_genome_sequence.py index 8550341b2..a50dcfaac 100644 --- a/tests/anoph/test_genome_sequence.py +++ b/tests/anoph/test_genome_sequence.py @@ -8,6 +8,8 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 +from malariagen_data import as1 as _as1 + from malariagen_data.anoph.genome_sequence import AnophelesGenomeSequenceData from malariagen_data.util import Region @@ -37,6 +39,18 @@ def af1_sim_api(af1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesGenomeSequenceData( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + ) + + def case_ag3_sim(ag3_sim_fixture, ag3_sim_api): return ag3_sim_fixture, ag3_sim_api @@ -45,6 +59,10 @@ def case_af1_sim(af1_sim_fixture, af1_sim_api): return af1_sim_fixture, af1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_contigs(fixture, api): contigs = api.contigs diff --git a/tests/anoph/test_heterozygosity.py b/tests/anoph/test_heterozygosity.py index 10118bbf3..45d42a7d0 100644 --- a/tests/anoph/test_heterozygosity.py +++ b/tests/anoph/test_heterozygosity.py @@ -8,6 +8,8 @@ from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 +from malariagen_data import as1 as _as1 + from malariagen_data.anoph.heterozygosity import AnophelesHetAnalysis @@ -92,6 +94,24 @@ def amin1_sim_api(amin1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesHetAnalysis( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, + ) + + def case_ag3_sim(ag3_sim_fixture, ag3_sim_api): return ag3_sim_fixture, ag3_sim_api @@ -108,6 +128,10 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_plot_heterozygosity_track(fixture, api: AnophelesHetAnalysis): # Set up test parameters. diff --git a/tests/anoph/test_ld.py b/tests/anoph/test_ld.py index 96eb9ec91..59cf394d5 100644 --- a/tests/anoph/test_ld.py +++ b/tests/anoph/test_ld.py @@ -3,8 +3,8 @@ import pytest from pytest_cases import parametrize_with_cases -from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 +from malariagen_data import as1 as _as1 from malariagen_data.anoph.ld import AnophelesLdAnalysis @@ -37,20 +37,20 @@ def ag3_sim_api(ag3_sim_fixture): @pytest.fixture -def af1_sim_api(af1_sim_fixture): +def as1_sim_api(as1_sim_fixture): return AnophelesLdAnalysis( - url=af1_sim_fixture.url, - public_url=af1_sim_fixture.url, - config_path=_af1.CONFIG_PATH, - major_version_number=_af1.MAJOR_VERSION_NUMBER, - major_version_path=_af1.MAJOR_VERSION_PATH, + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, pre=False, gff_gene_type="protein_coding_gene", gff_gene_name_attribute="Note", gff_default_attributes=("ID", "Parent", "Note", "description"), - default_site_mask="funestus", - results_cache=af1_sim_fixture.results_cache_path.as_posix(), - taxon_colors=_af1.TAXON_COLORS, + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, ) @@ -62,6 +62,10 @@ def case_af1_sim(af1_sim_fixture, af1_sim_api): return af1_sim_fixture, af1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_ld_pruning_returns_fewer_snps(fixture, api: AnophelesLdAnalysis): region = random.choice(api.contigs) diff --git a/tests/anoph/test_pca.py b/tests/anoph/test_pca.py index 3516f9db7..c2964bdb4 100644 --- a/tests/anoph/test_pca.py +++ b/tests/anoph/test_pca.py @@ -8,6 +8,7 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 +from malariagen_data import as1 as _as1 from malariagen_data.anoph.pca import AnophelesPca from malariagen_data.anoph import pca_params @@ -76,6 +77,24 @@ def adir1_sim_api(adir1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesPca( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, + ) + + # N.B., here we use pytest_cases to parametrize tests. Each # function whose name begins with "case_" defines a set of # inputs to the test functions. See the documentation for @@ -100,6 +119,10 @@ def case_adir1_sim(adir1_sim_fixture, adir1_sim_api): return adir1_sim_fixture, adir1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_pca_plotting(fixture, api: AnophelesPca): # Parameters for selecting input data. diff --git a/tests/anoph/test_plink_converter.py b/tests/anoph/test_plink_converter.py index d926525ce..e3533bca8 100644 --- a/tests/anoph/test_plink_converter.py +++ b/tests/anoph/test_plink_converter.py @@ -5,6 +5,7 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 +from malariagen_data import as1 as _as1 from malariagen_data.anoph.to_plink import PlinkConverter @@ -77,6 +78,24 @@ def adir1_sim_api(adir1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return PlinkConverter( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, + ) + + # N.B., here we use pytest_cases to parametrize tests. Each # function whose name begins with "case_" defines a set of # inputs to the test functions. See the documentation for @@ -97,6 +116,10 @@ def case_af1_sim(af1_sim_fixture, af1_sim_api): return af1_sim_fixture, af1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_plink_converter(fixture, api: PlinkConverter, tmp_path): # Parameters for selecting input data, filtering, and converting. diff --git a/tests/anoph/test_sample_metadata.py b/tests/anoph/test_sample_metadata.py index 59211924a..5fd59d87f 100644 --- a/tests/anoph/test_sample_metadata.py +++ b/tests/anoph/test_sample_metadata.py @@ -14,6 +14,7 @@ from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 +from malariagen_data import as1 as _as1 from malariagen_data.anoph.sample_metadata import AnophelesSampleMetadata @@ -123,6 +124,49 @@ def af1_sim_api(af1_sim_fixture): ) +@pytest.fixture +def as1_sim_unrestricted_use_only_api(as1_sim_fixture): + return AnophelesSampleMetadata( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + taxon_colors=_as1.TAXON_COLORS, + unrestricted_use_only=True, + ) + + +@pytest.fixture +def as1_sim_surveillance_use_only_api(as1_sim_fixture): + return AnophelesSampleMetadata( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + taxon_colors=_as1.TAXON_COLORS, + surveillance_use_only=True, + ) + + +@pytest.fixture +def as1_sim_unrestricted_surveillance_use_only_api(as1_sim_fixture): + return AnophelesSampleMetadata( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + taxon_colors=_as1.TAXON_COLORS, + unrestricted_use_only=True, + surveillance_use_only=True, + ) + + @pytest.fixture def adir1_sim_api(adir1_sim_fixture): return AnophelesSampleMetadata( @@ -150,45 +194,15 @@ def amin1_sim_api(amin1_sim_fixture): @pytest.fixture -def af1_sim_unrestricted_use_only_api(af1_sim_fixture): - return AnophelesSampleMetadata( - url=af1_sim_fixture.url, - public_url=af1_sim_fixture.url, - config_path=_af1.CONFIG_PATH, - major_version_number=_af1.MAJOR_VERSION_NUMBER, - major_version_path=_af1.MAJOR_VERSION_PATH, - pre=False, - taxon_colors=_af1.TAXON_COLORS, - unrestricted_use_only=True, - ) - - -@pytest.fixture -def af1_sim_surveillance_use_only_api(af1_sim_fixture): +def as1_sim_api(as1_sim_fixture): return AnophelesSampleMetadata( - url=af1_sim_fixture.url, - public_url=af1_sim_fixture.url, - config_path=_af1.CONFIG_PATH, - major_version_number=_af1.MAJOR_VERSION_NUMBER, - major_version_path=_af1.MAJOR_VERSION_PATH, + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, pre=False, - taxon_colors=_af1.TAXON_COLORS, - surveillance_use_only=True, - ) - - -@pytest.fixture -def af1_sim_unrestricted_surveillance_use_only_api(af1_sim_fixture): - return AnophelesSampleMetadata( - url=af1_sim_fixture.url, - public_url=af1_sim_fixture.url, - config_path=_af1.CONFIG_PATH, - major_version_number=_af1.MAJOR_VERSION_NUMBER, - major_version_path=_af1.MAJOR_VERSION_PATH, - pre=False, - taxon_colors=_af1.TAXON_COLORS, - unrestricted_use_only=True, - surveillance_use_only=True, + taxon_colors=_as1.TAXON_COLORS, ) @@ -234,6 +248,11 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@case(tags="as1") +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @case def case_ag3_sim_unrestricted_use_only( ag3_sim_fixture, ag3_sim_unrestricted_use_only_api @@ -248,6 +267,13 @@ def case_af1_sim_unrestricted_use_only( return af1_sim_fixture, af1_sim_unrestricted_use_only_api +@case +def case_as1_sim_unrestricted_use_only( + as1_sim_fixture, as1_sim_unrestricted_use_only_api +): + return as1_sim_fixture, as1_sim_unrestricted_use_only_api + + @case def case_ag3_sim_surveillance_use_only( ag3_sim_fixture, ag3_sim_surveillance_use_only_api @@ -262,6 +288,13 @@ def case_af1_sim_surveillance_use_only( return af1_sim_fixture, af1_sim_surveillance_use_only_api +@case +def case_as1_sim_surveillance_use_only( + as1_sim_fixture, as1_sim_surveillance_use_only_api +): + return as1_sim_fixture, as1_sim_surveillance_use_only_api + + @case def case_ag3_sim_unrestricted_surveillance_use_only( ag3_sim_fixture, ag3_sim_unrestricted_surveillance_use_only_api @@ -276,6 +309,13 @@ def case_af1_sim_unrestricted_surveillance_use_only( return af1_sim_fixture, af1_sim_unrestricted_surveillance_use_only_api +@case +def case_as1_sim_unrestricted_surveillance_use_only( + as1_sim_fixture, as1_sim_unrestricted_surveillance_use_only_api +): + return as1_sim_fixture, as1_sim_unrestricted_surveillance_use_only_api + + def general_metadata_expected_columns(): return { "sample_id": "O", diff --git a/tests/anoph/test_snp_data.py b/tests/anoph/test_snp_data.py index c6162793a..146d82b94 100644 --- a/tests/anoph/test_snp_data.py +++ b/tests/anoph/test_snp_data.py @@ -15,7 +15,7 @@ from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 -from malariagen_data import amin1 as _amin1 +from malariagen_data import as1 as _as1 from malariagen_data.anoph.base_params import DEFAULT @@ -83,19 +83,19 @@ def adir1_sim_api(adir1_sim_fixture): @pytest.fixture -def amin1_sim_api(amin1_sim_fixture): +def as1_sim_api(as1_sim_fixture): return AnophelesSnpData( - url=amin1_sim_fixture.url, - public_url=amin1_sim_fixture.url, - config_path=_amin1.CONFIG_PATH, - major_version_number=_amin1.MAJOR_VERSION_NUMBER, - major_version_path=_amin1.MAJOR_VERSION_PATH, + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, pre=False, gff_gene_type="protein_coding_gene", gff_gene_name_attribute="Note", gff_default_attributes=("ID", "Parent", "Note", "description"), - default_site_mask="minimus", - results_cache=amin1_sim_fixture.results_cache_path.as_posix(), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), ) @@ -131,6 +131,11 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@case +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_open_snp_sites(fixture, api: AnophelesSnpData): root = api.open_snp_sites() diff --git a/tests/anoph/test_snp_frq.py b/tests/anoph/test_snp_frq.py index 1e5bda529..4736ccff5 100644 --- a/tests/anoph/test_snp_frq.py +++ b/tests/anoph/test_snp_frq.py @@ -11,6 +11,7 @@ from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 +from malariagen_data import as1 as _as1 from malariagen_data.anoph.snp_frq import AnophelesSnpFrequencyAnalysis @@ -105,6 +106,24 @@ def amin1_sim_api(amin1_sim_fixture): ) +@pytest.fixture +def as1_sim_api(as1_sim_fixture): + return AnophelesSnpFrequencyAnalysis( + url=as1_sim_fixture.url, + public_url=as1_sim_fixture.url, + config_path=_as1.CONFIG_PATH, + major_version_number=_as1.MAJOR_VERSION_NUMBER, + major_version_path=_as1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="stephensi", + results_cache=as1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_as1.TAXON_COLORS, + ) + + # N.B., here we use pytest_cases to parametrize tests. Each # function whose name begins with "case_" defines a set of # inputs to the test functions. See the documentation for @@ -129,6 +148,10 @@ def case_adir1_sim(adir1_sim_fixture, adir1_sim_api): return adir1_sim_fixture, adir1_sim_api +def case_as1_sim(as1_sim_fixture, as1_sim_api): + return as1_sim_fixture, as1_sim_api + + @case(tags="single-sampleset") def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api