diff --git a/docs/source/Afar1.rst b/docs/source/Afar1.rst new file mode 100644 index 000000000..cf4345c99 --- /dev/null +++ b/docs/source/Afar1.rst @@ -0,0 +1,150 @@ +Afar1 +===== + +This page provides a curated list of functions and properties available in the ``malariagen_data`` API +for data on mosquitoes from *Anopheles farauti*. + +To set up the API, use the following code:: + + import malariagen_data + afar1 = malariagen_data.Afar1() + +All the functions below can then be accessed as methods on the ``afar1`` object. E.g., to call the +``sample_metadata()`` function, do:: + + df_samples = afar1.sample_metadata() + +For more information about the data and terms of use, please see the +`MalariaGEN website `_ or contact support@malariagen.net. + +.. currentmodule:: malariagen_data.afar1.Afar1 + +Basic data access +----------------- +.. autosummary:: + :toctree: generated/ + + releases + sample_sets + lookup_release + lookup_study + +Reference genome data access +---------------------------- +.. autosummary:: + :toctree: generated/ + + contigs + genome_sequence + genome_features + plot_transcript + plot_genes + +Sample metadata access +---------------------- +.. autosummary:: + :toctree: generated/ + + sample_metadata + add_extra_metadata + clear_extra_metadata + lookup_sample + count_samples + plot_samples_bar + plot_samples_interactive_map + plot_sample_location_mapbox + plot_sample_location_geo + wgs_data_catalog + cohorts + +SNP data access +--------------- +.. autosummary:: + :toctree: generated/ + + site_mask_ids + snp_calls + snp_allele_counts + plot_snps + site_annotations + is_accessible + biallelic_snp_calls + biallelic_diplotypes + biallelic_snps_to_plink + +Integrative genomics viewer (IGV) +--------------------------------- +.. autosummary:: + :toctree: generated/ + + igv + view_alignments + +SNP frequency analysis +---------------------- +.. autosummary:: + :toctree: generated/ + + snp_allele_frequencies + snp_allele_frequencies_advanced + aa_allele_frequencies + aa_allele_frequencies_advanced + plot_frequencies_heatmap + plot_frequencies_time_series + plot_frequencies_interactive_map + +Principal components analysis (PCA) +----------------------------------- +.. autosummary:: + :toctree: generated/ + + pca + plot_pca_variance + plot_pca_coords + plot_pca_coords_3d + +Genetic distance and neighbour-joining trees (NJT) +-------------------------------------------------- +.. autosummary:: + :toctree: generated/ + + plot_njt + njt + biallelic_diplotype_pairwise_distances + +Heterozygosity analysis +----------------------- +.. autosummary:: + :toctree: generated/ + + plot_heterozygosity + roh_hmm + plot_roh + +Diversity analysis +------------------ +.. autosummary:: + :toctree: generated/ + + cohort_diversity_stats + diversity_stats + plot_diversity_stats + +Diplotype clustering +-------------------- +.. autosummary:: + :toctree: generated/ + + plot_diplotype_clustering + plot_diplotype_clustering_advanced + +Fst analysis +------------ +.. autosummary:: + :toctree: generated/ + + average_fst + pairwise_average_fst + plot_pairwise_average_fst + fst_gwss + plot_fst_gwss diff --git a/docs/source/index.rst b/docs/source/index.rst index f9658a78e..75bf13d68 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -43,6 +43,14 @@ API documentation .. image:: https://phil.cdc.gov//PHIL_Images/8777/8777_lores.jpg + .. grid-item-card:: ``Afar1`` + :link: Afar1 + :link-type: doc + + *Anopheles farauti*. + + .. image:: https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/Anopheles_farauti.jpg/640px-Anopheles_farauti.jpg + Documentation for the `Pf7 `_ (*Plasmodium falciparum*) and `Pv4 `_ (*Plasmodium vivax*) APIs is also available, currently hosted on a separate site. diff --git a/malariagen_data/__init__.py b/malariagen_data/__init__.py index 0934a0359..2f8ebb77e 100644 --- a/malariagen_data/__init__.py +++ b/malariagen_data/__init__.py @@ -2,6 +2,7 @@ from .adar1 import Adar1 from .adir1 import Adir1 from .af1 import Af1 +from .afar1 import Afar1 from .ag3 import Ag3 from .amin1 import Amin1 from .anopheles import AnophelesDataResource, Region diff --git a/malariagen_data/adar1.py b/malariagen_data/adar1.py index 1340a9554..4ca6f7c35 100644 --- a/malariagen_data/adar1.py +++ b/malariagen_data/adar1.py @@ -152,7 +152,7 @@ def __repr__(self): f"Please note that data are subject to terms of use,\n" f"for more information see https://www.malariagen.net/data\n" f"or contact support@malariagen.net. For API documentation see \n" - f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html" + f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adar1.html" ) return text @@ -167,7 +167,7 @@ def _repr_html_(self): Please note that data are subject to terms of use, for more information see the MalariaGEN website or contact support@malariagen.net. - See also the Adir1 API docs. + See also the Adar1 API docs. diff --git a/malariagen_data/afar1.py b/malariagen_data/afar1.py new file mode 100644 index 000000000..df88f1377 --- /dev/null +++ b/malariagen_data/afar1.py @@ -0,0 +1,239 @@ +import sys + +import plotly.express as px # type: ignore + +import malariagen_data +from .anopheles import AnophelesDataResource + +MAJOR_VERSION_NUMBER = 1 +MAJOR_VERSION_PATH = "v1.0" +CONFIG_PATH = "v1.0-config.json" +GCS_DEFAULT_URL = "gs://vo_afar_release_master_us_central1/" +GCS_DEFAULT_PUBLIC_URL = "gs://vo_afar_release_master_us_central1/" +GCS_REGION_URLS = { + "us-central1": "gs://vo_afar_release_master_us_central1", +} + +TAXON_PALETTE = px.colors.qualitative.Plotly +TAXON_COLORS = { + "farauti": TAXON_PALETTE[0], +} + +XPEHH_GWSS_CACHE_NAME = "afar1_xpehh_gwss_v1" +IHS_GWSS_CACHE_NAME = "afar1_ihs_gwss_v1" +ROH_HMM_CACHE_NAME = "afar1_roh_hmm_v1" + + +class Afar1(AnophelesDataResource): + """Provides access to data from Afar1.0 releases. + + Parameters + ---------- + url : str, optional + Base path to data. Defaults to use Google Cloud Storage, or can + be a local path on your file system if data have been downloaded. + site_filters_analysis : str, optional + Site filters analysis version. + bokeh_output_notebook : bool, optional + If True (default), configure bokeh to output plots to the notebook. + results_cache : str, optional + Path to directory on local file system to save results. + log : str or stream, optional + File path or stream output for logging messages. + debug : bool, optional + Set to True to enable debug level logging. + show_progress : bool, optional + If True, show a progress bar during longer-running computations. + The default can be overridden using an environmental variable + named MGEN_SHOW_PROGRESS. + check_location : bool, optional + If True, use ipinfo to check the location of the client system. + **kwargs + Passed through to fsspec when setting up file system access. + + Examples + -------- + Access data from Google Cloud Storage (default): + + >>> import malariagen_data + >>> afar1 = malariagen_data.Afar1() + + Access data downloaded to a local file system: + + >>> afar1 = malariagen_data.Afar1("/local/path/to/vo_afar_release/") + + Access data from Google Cloud Storage, with caching on the local file system + in a directory named "gcs_cache": + + >>> afar1 = malariagen_data.Afar1( + ... "simplecache::gs://vo_afar_release_master_us_central1", + ... simplecache=dict(cache_storage="gcs_cache"), + ... ) + + Set up caching of some longer-running computations on the local file system, + in a directory named "results_cache": + + >>> afar1 = malariagen_data.Afar1(results_cache="results_cache") + + """ + + _xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME + _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME + _roh_hmm_cache_name = ROH_HMM_CACHE_NAME + + def __init__( + self, + url=None, + public_url=GCS_DEFAULT_PUBLIC_URL, + bokeh_output_notebook=True, + results_cache=None, + log=sys.stdout, + debug=False, + show_progress=None, + check_location=True, + cohorts_analysis=None, + site_filters_analysis=None, + discordant_read_calls_analysis=None, + pre=False, + tqdm_class=None, + unrestricted_use_only=False, + surveillance_use_only=False, + **storage_options, + ): + super().__init__( + url=url, + public_url=public_url, + config_path=CONFIG_PATH, + cohorts_analysis=cohorts_analysis, + aim_analysis=None, + aim_metadata_dtype=None, + aim_ids=None, + aim_palettes=None, + site_filters_analysis=site_filters_analysis, + discordant_read_calls_analysis=discordant_read_calls_analysis, + default_site_mask="farauti", + default_phasing_analysis="farauti", + default_coverage_calls_analysis="farauti", + bokeh_output_notebook=bokeh_output_notebook, + results_cache=results_cache, + log=log, + debug=debug, + show_progress=show_progress, + check_location=check_location, + pre=pre, + gcs_default_url=GCS_DEFAULT_URL, + gcs_region_urls=GCS_REGION_URLS, + major_version_number=MAJOR_VERSION_NUMBER, + major_version_path=MAJOR_VERSION_PATH, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + storage_options=storage_options, + tqdm_class=tqdm_class, + taxon_colors=TAXON_COLORS, + virtual_contigs=None, + inversion_tag_path=None, + unrestricted_use_only=unrestricted_use_only, + surveillance_use_only=surveillance_use_only, + ) + + def __repr__(self): + text = ( + f"\n" + f"Storage URL : {self._url}\n" + f"Data releases available : {', '.join(self._available_releases)}\n" + f"Results cache : {self._results_cache}\n" + f"Cohorts analysis : {self._cohorts_analysis}\n" + f"Site filters analysis : {self._site_filters_analysis}\n" + f"Software version : malariagen_data {malariagen_data.__version__}\n" + f"Client location : {self.client_location}\n" + f"Data filtered to unrestricted use only: {self._unrestricted_use_only}\n" + f"Data filtered to surveillance use only: {self._surveillance_use_only}\n" + f"Relevant data releases : {', '.join(self.releases)}\n" + f"---\n" + f"Please note that data are subject to terms of use,\n" + f"for more information see https://www.malariagen.net/data\n" + f"or contact support@malariagen.net. For API documentation see \n" + f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Afar1.html" + ) + return text + + def _repr_html_(self): + html = f""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MalariaGEN Afar1 API client
+ Please note that data are subject to terms of use, + for more information see + the MalariaGEN website or contact support@malariagen.net. + See also the Afar1 API docs. +
+ Storage URL + {self._url}
+ Data releases available + {", ".join(self._available_releases)}
+ Results cache + {self._results_cache}
+ Cohorts analysis + {self._cohorts_analysis}
+ Site filters analysis + {self._site_filters_analysis}
+ Software version + malariagen_data {malariagen_data.__version__}
+ Client location + {self.client_location}
+ Data filtered for unrestricted use only + {self._unrestricted_use_only}
+ Data filtered for surveillance use only + {self._surveillance_use_only}
+ Relevant data releases + {", ".join(self.releases)}
+ """ + return html diff --git a/tests/anoph/conftest.py b/tests/anoph/conftest.py index 505d17c46..ed58286cb 100644 --- a/tests/anoph/conftest.py +++ b/tests/anoph/conftest.py @@ -3228,3 +3228,301 @@ def adir1_sim_fixture(fixture_dir): @pytest.fixture(scope="session") def amin1_sim_fixture(fixture_dir): return Amin1Simulator(fixture_dir=fixture_dir, rng=create_rng("Amin1")) + + +class Afar1Simulator(AnophelesSimulator): + def __init__(self, fixture_dir, rng): + super().__init__( + fixture_dir=fixture_dir, + rng=rng, + bucket="vo_afar_release_master_us_central1", + releases=("1.0",), + has_aims=False, + has_cohorts_by_quarter=True, + has_sequence_qc=True, + ) + + def init_config(self): + self.config = { + "PUBLIC_RELEASES": ["1.0"], + "GENESET_GFF3_PATH": "reference/genome/AfarF4/AfarF4.gff", + "GENOME_FASTA_PATH": "reference/genome/AfarF4/AfarF4_Genome.fasta", + "GENOME_FAI_PATH": "reference/genome/AfarF4/AfarF4_Genome.fasta.fai", + "GENOME_ZARR_PATH": "reference/genome/AfarF4/AfarF4_Genome.zarr", + "GENOME_REF_ID": "AfarF4", + "GENOME_REF_NAME": "Anopheles farauti", + "CONTIGS": [ + "scaffold_1", + "scaffold_2", + "scaffold_3", + ], + "SITE_ANNOTATIONS_ZARR_PATH": "reference/genome/AfarF4/AfarF4_SEQANNOTATION.zarr", + "DEFAULT_SITE_FILTERS_ANALYSIS": "sc_20260101", + "DEFAULT_COHORTS_ANALYSIS": "20260101", + "DEFAULT_DISCORDANT_READ_CALLS_ANALYSIS": "", + "SITE_MASK_IDS": ["farauti"], + "PHASING_ANALYSIS_IDS": ["farauti"], + } + config_path = self.bucket_path / "v1.0-config.json" + with config_path.open(mode="w") as f: + json.dump(self.config, f, indent=4) + + def init_public_release_manifest(self): + release_path = self.bucket_path / "v1.0" + release_path.mkdir(parents=True, exist_ok=True) + manifest_path = release_path / "manifest.tsv" + manifest = pd.DataFrame( + { + "sample_set": [ + "1300-VO-PG-BEEBE-VMF00210", + ], + "sample_count": [20], + "study_id": [ + "1300-VO-PG-BEEBE-VMF00210", + ], + "study_url": [ + "https://www.malariagen.net/network/where-we-work/1300-VO-PG-BEEBE-VMF00210", + ], + "terms_of_use_expiry_date": [ + "2027-01-01", + ], + "terms_of_use_url": [ + "https://malariagen.github.io/vector-data/afar1/afar1.0.html#terms-of-use", + ], + } + ) + manifest.to_csv(manifest_path, index=False, sep="\t") + self.release_manifests["1.0"] = manifest + + def init_genome_sequence(self): + base_composition = { + b"a": 0.0, + b"c": 0.0, + b"g": 0.0, + b"t": 0.0, + b"n": 0.0, + b"A": 0.295, + b"C": 0.205, + b"G": 0.205, + b"T": 0.295, + b"N": 1.0e-05, + } + path = self.bucket_path / self.config["GENOME_ZARR_PATH"] + self.genome = simulate_genome( + path=path, + contigs=self.contigs, + low=80_000, + high=120_000, + base_composition=base_composition, + rng=self.rng, + ) + self.contig_sizes = { + contig: self.genome[contig].shape[0] for contig in self.contigs + } + + def init_genome_features(self): + path = self.bucket_path / self.config["GENESET_GFF3_PATH"] + path.parent.mkdir(parents=True, exist_ok=True) + simulator = Gff3Simulator( + contig_sizes=self.contig_sizes, + rng=self.rng, + gene_type="gene", + attrs=("Note", "description"), + ) + self.genome_features = simulator.simulate_gff(path=path) + + def write_metadata(self, release, release_path, sample_set, sequence_qc=True): + n_samples_sim = ( + self.release_manifests[release] + .set_index("sample_set") + .loc[sample_set]["sample_count"] + ) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + df_general = pd.read_csv(src_path) + df_general_ds = df_general.sample( + n_samples_sim, replace=False, random_state=self.rng + ) + samples_ds = df_general_ds["sample_id"].tolist() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_general_ds.to_csv(dst_path, index=False) + + if sequence_qc: + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "curation" + / sample_set + / "sequence_qc_stats.csv" + ) + df_sequence_qc_stats = pd.read_csv(src_path) + df_sequence_qc_stats_ds = ( + df_sequence_qc_stats.set_index("sample_id") + .loc[samples_ds] + .reset_index() + ) + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "curation" + / sample_set + / "sequence_qc_stats.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_sequence_qc_stats_ds.to_csv(dst_path, index=False) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "cohorts_20260101" + / sample_set + / "samples.cohorts.csv" + ) + df_coh = pd.read_csv(src_path) + df_coh_ds = df_coh.set_index("sample_id").loc[samples_ds].reset_index() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "cohorts_20260101" + / sample_set + / "samples.cohorts.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_coh_ds.to_csv(dst_path, index=False) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_snp_data.csv" + ) + df_cat = pd.read_csv(src_path) + df_cat_ds = df_cat.set_index("sample_id").loc[samples_ds].reset_index() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_snp_data.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_cat_ds.to_csv(dst_path, index=False) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_accession_data.csv" + ) + df_cat = pd.read_csv(src_path) + df_cat_ds = df_cat.set_index("sample_id").loc[samples_ds].reset_index() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_accession_data.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_cat_ds.to_csv(dst_path, index=False) + + def init_metadata(self): + self.write_metadata( + release="1.0", + release_path="v1.0", + sample_set="1300-VO-PG-BEEBE-VMF00210", + ) + + def init_snp_sites(self): + path = self.bucket_path / "v1.0/snp_genotypes/all/sites/" + self.snp_sites, self.n_snp_sites = simulate_snp_sites( + path=path, contigs=self.contigs, genome=self.genome + ) + + def init_site_filters(self): + analysis = self.config["DEFAULT_SITE_FILTERS_ANALYSIS"] + + mask = "farauti" + p_pass = 0.60 + path = self.bucket_path / "v1.0/site_filters" / analysis / mask + simulate_site_filters( + path=path, + contigs=self.contigs, + p_pass=p_pass, + n_sites=self.n_snp_sites, + rng=self.rng, + ) + + def init_snp_genotypes(self): + for release, manifest in self.release_manifests.items(): + release_path = f"v{release}" + + for rec in manifest.itertuples(): + sample_set = rec.sample_set + metadata_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + + zarr_path = ( + self.bucket_path + / release_path + / "snp_genotypes" + / "all" + / sample_set + ) + + p_allele = np.array([0.981, 0.006, 0.008, 0.005]) + p_missing = np.array([0.95, 0.05]) + simulate_snp_genotypes( + zarr_path=zarr_path, + metadata_path=metadata_path, + contigs=self.contigs, + n_sites=self.n_snp_sites, + p_allele=p_allele, + p_missing=p_missing, + rng=self.rng, + ) + + def init_site_annotations(self): + path = self.bucket_path / self.config["SITE_ANNOTATIONS_ZARR_PATH"] + simulate_site_annotations(path=path, genome=self.genome, rng=self.rng) + + +@pytest.fixture(scope="session") +def afar1_sim_fixture(fixture_dir): + return Afar1Simulator(fixture_dir=fixture_dir, rng=create_rng("Afar1")) diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/cohorts_20260101/1300-VO-PG-BEEBE-VMF00210/samples.cohorts.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/cohorts_20260101/1300-VO-PG-BEEBE-VMF00210/samples.cohorts.csv new file mode 100644 index 000000000..90ff85739 --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/cohorts_20260101/1300-VO-PG-BEEBE-VMF00210/samples.cohorts.csv @@ -0,0 +1,31 @@ +sample_id,country_ISO,adm1_name,adm1_ISO,adm2_name,taxon,cohort_admin1_year,cohort_admin1_month,cohort_admin1_quarter,cohort_admin2_year,cohort_admin2_month,cohort_admin2_quarter +VBS20001-5100STDY8400001,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_03,PG-MPL_fara_2020_Q1,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_03,PG-MPL_Madang_fara_2020_Q1 +VBS20002-5100STDY8400002,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_03,PG-MPL_fara_2020_Q1,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_03,PG-MPL_Madang_fara_2020_Q1 +VBS20003-5100STDY8400003,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_04,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_04,PG-MPL_Madang_fara_2020_Q2 +VBS20004-5100STDY8400004,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_04,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_04,PG-MPL_Madang_fara_2020_Q2 +VBS20005-5100STDY8400005,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_05,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_05,PG-MPL_Madang_fara_2020_Q2 +VBS20006-5100STDY8400006,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_06,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_06,PG-MPL_Madang_fara_2020_Q2 +VBS20007-5100STDY8400007,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_03,PG-MPM_fara_2020_Q1,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_03,PG-MPM_Lae_fara_2020_Q1 +VBS20008-5100STDY8400008,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_04,PG-MPM_fara_2020_Q2,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_04,PG-MPM_Lae_fara_2020_Q2 +VBS20009-5100STDY8400009,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_05,PG-MPM_fara_2020_Q2,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_05,PG-MPM_Lae_fara_2020_Q2 +VBS20010-5100STDY8400010,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_06,PG-MPM_fara_2020_Q2,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_06,PG-MPM_Lae_fara_2020_Q2 +VBS20011-5100STDY8400011,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_07,PG-MPM_fara_2020_Q3,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_07,PG-MPM_Lae_fara_2020_Q3 +VBS20012-5100STDY8400012,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_08,PG-MPM_fara_2020_Q3,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_08,PG-MPM_Lae_fara_2020_Q3 +VBS20013-5100STDY8400013,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_03,PG-ESW_fara_2020_Q1,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_03,PG-ESW_Wewak_fara_2020_Q1 +VBS20014-5100STDY8400014,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_04,PG-ESW_fara_2020_Q2,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_04,PG-ESW_Wewak_fara_2020_Q2 +VBS20015-5100STDY8400015,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_05,PG-ESW_fara_2020_Q2,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_05,PG-ESW_Wewak_fara_2020_Q2 +VBS20016-5100STDY8400016,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_06,PG-ESW_fara_2020_Q2,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_06,PG-ESW_Wewak_fara_2020_Q2 +VBS20017-5100STDY8400017,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_01,SB-GU_fara_2021_Q1,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_01,SB-GU_Honiara_fara_2021_Q1 +VBS20018-5100STDY8400018,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_02,SB-GU_fara_2021_Q1,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_02,SB-GU_Honiara_fara_2021_Q1 +VBS20019-5100STDY8400019,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_03,SB-GU_fara_2021_Q1,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_03,SB-GU_Honiara_fara_2021_Q1 +VBS20020-5100STDY8400020,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_04,SB-GU_fara_2021_Q2,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_04,SB-GU_Honiara_fara_2021_Q2 +VBS20021-5100STDY8400021,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_05,SB-GU_fara_2021_Q2,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_05,SB-GU_Honiara_fara_2021_Q2 +VBS20022-5100STDY8400022,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_06,SB-GU_fara_2021_Q2,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_06,SB-GU_Honiara_fara_2021_Q2 +VBS20023-5100STDY8400023,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_01,SB-ML_fara_2021_Q1,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_01,SB-ML_Auki_fara_2021_Q1 +VBS20024-5100STDY8400024,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_02,SB-ML_fara_2021_Q1,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_02,SB-ML_Auki_fara_2021_Q1 +VBS20025-5100STDY8400025,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_03,SB-ML_fara_2021_Q1,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_03,SB-ML_Auki_fara_2021_Q1 +VBS20026-5100STDY8400026,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_04,SB-ML_fara_2021_Q2,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_04,SB-ML_Auki_fara_2021_Q2 +VBS20027-5100STDY8400027,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_05,SB-ML_fara_2021_Q2,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_05,SB-ML_Auki_fara_2021_Q2 +VBS20028-5100STDY8400028,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_06,SB-ML_fara_2021_Q2,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_06,SB-ML_Auki_fara_2021_Q2 +VBS20029-5100STDY8400029,VUT,Shefa Province,VU-SEE,Port Vila,farauti,VU-SEE_fara_2021,VU-SEE_fara_2021_01,VU-SEE_fara_2021_Q1,VU-SEE_Port-Vila_fara_2021,VU-SEE_Port-Vila_fara_2021_01,VU-SEE_Port-Vila_fara_2021_Q1 +VBS20030-5100STDY8400030,VUT,Shefa Province,VU-SEE,Port Vila,farauti,VU-SEE_fara_2021,VU-SEE_fara_2021_02,VU-SEE_fara_2021_Q1,VU-SEE_Port-Vila_fara_2021,VU-SEE_Port-Vila_fara_2021_02,VU-SEE_Port-Vila_fara_2021_Q1 diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/curation/1300-VO-PG-BEEBE-VMF00210/sequence_qc_stats.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/curation/1300-VO-PG-BEEBE-VMF00210/sequence_qc_stats.csv new file mode 100644 index 000000000..974c2fd1e --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/curation/1300-VO-PG-BEEBE-VMF00210/sequence_qc_stats.csv @@ -0,0 +1,31 @@ +sample_id,mean_cov,median_cov,modal_cov,frac_gen_cov,divergence,contam_pct,contam_LLR +VBS20001-5100STDY8400001,38.45,38,37,0.984,0.017,1.823,2785.036 +VBS20002-5100STDY8400002,42.31,42,41,0.984,0.017,2.148,3427.874 +VBS20003-5100STDY8400003,35.67,35,34,0.984,0.017,2.394,3603.611 +VBS20004-5100STDY8400004,44.12,44,43,0.984,0.017,1.995,3279.245 +VBS20005-5100STDY8400005,29.88,29,28,0.983,0.017,2.718,2556.379 +VBS20006-5100STDY8400006,37.56,37,36,0.984,0.017,2.019,2997.331 +VBS20007-5100STDY8400007,41.23,41,40,0.984,0.017,1.921,2708.875 +VBS20008-5100STDY8400008,33.91,33,33,0.984,0.017,2.482,2826.487 +VBS20009-5100STDY8400009,46.78,46,45,0.984,0.017,1.887,3068.332 +VBS20010-5100STDY8400010,35.14,35,34,0.984,0.017,2.574,2885.976 +VBS20011-5100STDY8400011,40.89,40,39,0.984,0.017,2.365,3435.056 +VBS20012-5100STDY8400012,38.22,38,37,0.984,0.018,3.147,5700.932 +VBS20013-5100STDY8400013,52.67,52,51,0.984,0.017,1.425,2115.483 +VBS20014-5100STDY8400014,43.45,43,42,0.984,0.017,1.815,2540.393 +VBS20015-5100STDY8400015,36.11,36,35,0.984,0.017,2.513,3167.230 +VBS20016-5100STDY8400016,39.78,39,38,0.984,0.017,2.330,2828.396 +VBS20017-5100STDY8400017,31.56,31,30,0.984,0.017,2.942,3757.173 +VBS20018-5100STDY8400018,48.34,48,47,0.984,0.017,1.908,3168.616 +VBS20019-5100STDY8400019,37.12,37,36,0.984,0.017,2.778,4077.416 +VBS20020-5100STDY8400020,42.56,42,41,0.984,0.017,2.170,3225.560 +VBS20021-5100STDY8400021,30.78,30,29,0.983,0.017,2.366,2572.631 +VBS20022-5100STDY8400022,40.45,40,39,0.984,0.017,2.325,3377.131 +VBS20023-5100STDY8400023,36.89,36,36,0.984,0.017,1.810,2361.649 +VBS20024-5100STDY8400024,34.23,34,33,0.984,0.017,2.420,3059.231 +VBS20025-5100STDY8400025,25.67,25,24,0.983,0.018,3.263,3409.580 +VBS20026-5100STDY8400026,33.45,33,32,0.984,0.017,2.755,3655.926 +VBS20027-5100STDY8400027,39.01,39,38,0.984,0.017,2.483,3600.917 +VBS20028-5100STDY8400028,32.34,32,31,0.984,0.017,2.738,3364.031 +VBS20029-5100STDY8400029,47.89,47,46,0.985,0.017,2.711,5803.981 +VBS20030-5100STDY8400030,41.67,41,40,0.984,0.017,2.864,5038.697 diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/samples.meta.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/samples.meta.csv new file mode 100644 index 000000000..19bb768a2 --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/samples.meta.csv @@ -0,0 +1,31 @@ +sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call +VBS20001-5100STDY8400001,PG-AF-00101,Nigel Beebe,Papua New Guinea,Madang,2020,3,-5.207,145.789,F +VBS20002-5100STDY8400002,PG-AF-00102,Nigel Beebe,Papua New Guinea,Madang,2020,3,-5.207,145.789,F +VBS20003-5100STDY8400003,PG-AF-00103,Nigel Beebe,Papua New Guinea,Madang,2020,4,-5.207,145.789,F +VBS20004-5100STDY8400004,PG-AF-00104,Nigel Beebe,Papua New Guinea,Madang,2020,4,-5.207,145.789,F +VBS20005-5100STDY8400005,PG-AF-00105,Nigel Beebe,Papua New Guinea,Madang,2020,5,-5.207,145.789,F +VBS20006-5100STDY8400006,PG-AF-00106,Nigel Beebe,Papua New Guinea,Madang,2020,6,-5.207,145.789,F +VBS20007-5100STDY8400007,PG-AF-00107,Nigel Beebe,Papua New Guinea,Lae,2020,3,-6.734,147.000,F +VBS20008-5100STDY8400008,PG-AF-00108,Nigel Beebe,Papua New Guinea,Lae,2020,4,-6.734,147.000,F +VBS20009-5100STDY8400009,PG-AF-00109,Nigel Beebe,Papua New Guinea,Lae,2020,5,-6.734,147.000,F +VBS20010-5100STDY8400010,PG-AF-00110,Nigel Beebe,Papua New Guinea,Lae,2020,6,-6.734,147.000,F +VBS20011-5100STDY8400011,PG-AF-00111,Nigel Beebe,Papua New Guinea,Lae,2020,7,-6.734,147.000,F +VBS20012-5100STDY8400012,PG-AF-00112,Nigel Beebe,Papua New Guinea,Lae,2020,8,-6.734,147.000,F +VBS20013-5100STDY8400013,PG-AF-00113,Nigel Beebe,Papua New Guinea,Wewak,2020,3,-3.866,143.860,F +VBS20014-5100STDY8400014,PG-AF-00114,Nigel Beebe,Papua New Guinea,Wewak,2020,4,-3.866,143.860,F +VBS20015-5100STDY8400015,PG-AF-00115,Nigel Beebe,Papua New Guinea,Wewak,2020,5,-3.866,143.860,F +VBS20016-5100STDY8400016,PG-AF-00116,Nigel Beebe,Papua New Guinea,Wewak,2020,6,-3.866,143.860,F +VBS20017-5100STDY8400017,SB-AF-00201,Nigel Beebe,Solomon Islands,Honiara,2021,1,-9.432,160.000,F +VBS20018-5100STDY8400018,SB-AF-00202,Nigel Beebe,Solomon Islands,Honiara,2021,2,-9.432,160.000,F +VBS20019-5100STDY8400019,SB-AF-00203,Nigel Beebe,Solomon Islands,Honiara,2021,3,-9.432,160.000,F +VBS20020-5100STDY8400020,SB-AF-00204,Nigel Beebe,Solomon Islands,Honiara,2021,4,-9.432,160.000,F +VBS20021-5100STDY8400021,SB-AF-00205,Nigel Beebe,Solomon Islands,Honiara,2021,5,-9.432,160.000,F +VBS20022-5100STDY8400022,SB-AF-00206,Nigel Beebe,Solomon Islands,Honiara,2021,6,-9.432,160.000,F +VBS20023-5100STDY8400023,SB-AF-00207,Nigel Beebe,Solomon Islands,Auki,2021,1,-8.768,160.693,F +VBS20024-5100STDY8400024,SB-AF-00208,Nigel Beebe,Solomon Islands,Auki,2021,2,-8.768,160.693,F +VBS20025-5100STDY8400025,SB-AF-00209,Nigel Beebe,Solomon Islands,Auki,2021,3,-8.768,160.693,F +VBS20026-5100STDY8400026,SB-AF-00210,Nigel Beebe,Solomon Islands,Auki,2021,4,-8.768,160.693,F +VBS20027-5100STDY8400027,SB-AF-00211,Nigel Beebe,Solomon Islands,Auki,2021,5,-8.768,160.693,F +VBS20028-5100STDY8400028,SB-AF-00212,Nigel Beebe,Solomon Islands,Auki,2021,6,-8.768,160.693,F +VBS20029-5100STDY8400029,VU-AF-00301,Nigel Beebe,Vanuatu,Port Vila,2021,1,-17.734,168.322,F +VBS20030-5100STDY8400030,VU-AF-00302,Nigel Beebe,Vanuatu,Port Vila,2021,2,-17.734,168.322,F diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_accession_data.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_accession_data.csv new file mode 100644 index 000000000..f4467440f --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_accession_data.csv @@ -0,0 +1,31 @@ +sample_id,run_ena,analysis_ena +VBS20001-5100STDY8400001,, +VBS20002-5100STDY8400002,, +VBS20003-5100STDY8400003,, +VBS20004-5100STDY8400004,, +VBS20005-5100STDY8400005,, +VBS20006-5100STDY8400006,, +VBS20007-5100STDY8400007,, +VBS20008-5100STDY8400008,, +VBS20009-5100STDY8400009,, +VBS20010-5100STDY8400010,, +VBS20011-5100STDY8400011,, +VBS20012-5100STDY8400012,, +VBS20013-5100STDY8400013,, +VBS20014-5100STDY8400014,, +VBS20015-5100STDY8400015,, +VBS20016-5100STDY8400016,, +VBS20017-5100STDY8400017,, +VBS20018-5100STDY8400018,, +VBS20019-5100STDY8400019,, +VBS20020-5100STDY8400020,, +VBS20021-5100STDY8400021,, +VBS20022-5100STDY8400022,, +VBS20023-5100STDY8400023,, +VBS20024-5100STDY8400024,, +VBS20025-5100STDY8400025,, +VBS20026-5100STDY8400026,, +VBS20027-5100STDY8400027,, +VBS20028-5100STDY8400028,, +VBS20029-5100STDY8400029,, +VBS20030-5100STDY8400030,, diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_snp_data.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_snp_data.csv new file mode 100644 index 000000000..bdc013a3b --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_snp_data.csv @@ -0,0 +1,31 @@ +sample_id,alignments_bam,alignments_bam_md5,snp_genotypes_vcf,snp_genotypes_vcf_md5,snp_genotypes_zarr,snp_genotypes_zarr_md5,pipeline_version +VBS20001-5100STDY8400001,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20001-5100STDY8400001-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20001-5100STDY8400001-2022-01-02.vcf.gz,,,, +VBS20002-5100STDY8400002,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20002-5100STDY8400002-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20002-5100STDY8400002-2022-01-02.vcf.gz,,,, +VBS20003-5100STDY8400003,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20003-5100STDY8400003-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20003-5100STDY8400003-2022-01-02.vcf.gz,,,, +VBS20004-5100STDY8400004,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20004-5100STDY8400004-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20004-5100STDY8400004-2022-01-02.vcf.gz,,,, +VBS20005-5100STDY8400005,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20005-5100STDY8400005-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20005-5100STDY8400005-2022-01-02.vcf.gz,,,, +VBS20006-5100STDY8400006,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20006-5100STDY8400006-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20006-5100STDY8400006-2022-01-02.vcf.gz,,,, +VBS20007-5100STDY8400007,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20007-5100STDY8400007-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20007-5100STDY8400007-2022-01-02.vcf.gz,,,, +VBS20008-5100STDY8400008,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20008-5100STDY8400008-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20008-5100STDY8400008-2022-01-02.vcf.gz,,,, +VBS20009-5100STDY8400009,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20009-5100STDY8400009-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20009-5100STDY8400009-2022-01-02.vcf.gz,,,, +VBS20010-5100STDY8400010,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20010-5100STDY8400010-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20010-5100STDY8400010-2022-01-02.vcf.gz,,,, +VBS20011-5100STDY8400011,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20011-5100STDY8400011-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20011-5100STDY8400011-2022-01-02.vcf.gz,,,, +VBS20012-5100STDY8400012,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20012-5100STDY8400012-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20012-5100STDY8400012-2022-01-02.vcf.gz,,,, +VBS20013-5100STDY8400013,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20013-5100STDY8400013-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20013-5100STDY8400013-2022-01-02.vcf.gz,,,, +VBS20014-5100STDY8400014,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20014-5100STDY8400014-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20014-5100STDY8400014-2022-01-02.vcf.gz,,,, +VBS20015-5100STDY8400015,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20015-5100STDY8400015-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20015-5100STDY8400015-2022-01-02.vcf.gz,,,, +VBS20016-5100STDY8400016,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20016-5100STDY8400016-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20016-5100STDY8400016-2022-01-02.vcf.gz,,,, +VBS20017-5100STDY8400017,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20017-5100STDY8400017-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20017-5100STDY8400017-2022-01-02.vcf.gz,,,, +VBS20018-5100STDY8400018,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20018-5100STDY8400018-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20018-5100STDY8400018-2022-01-02.vcf.gz,,,, +VBS20019-5100STDY8400019,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20019-5100STDY8400019-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20019-5100STDY8400019-2022-01-02.vcf.gz,,,, +VBS20020-5100STDY8400020,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20020-5100STDY8400020-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20020-5100STDY8400020-2022-01-02.vcf.gz,,,, +VBS20021-5100STDY8400021,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20021-5100STDY8400021-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20021-5100STDY8400021-2022-01-02.vcf.gz,,,, +VBS20022-5100STDY8400022,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20022-5100STDY8400022-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20022-5100STDY8400022-2022-01-02.vcf.gz,,,, +VBS20023-5100STDY8400023,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20023-5100STDY8400023-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20023-5100STDY8400023-2022-01-02.vcf.gz,,,, +VBS20024-5100STDY8400024,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20024-5100STDY8400024-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20024-5100STDY8400024-2022-01-02.vcf.gz,,,, +VBS20025-5100STDY8400025,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20025-5100STDY8400025-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20025-5100STDY8400025-2022-01-02.vcf.gz,,,, +VBS20026-5100STDY8400026,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20026-5100STDY8400026-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20026-5100STDY8400026-2022-01-02.vcf.gz,,,, +VBS20027-5100STDY8400027,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20027-5100STDY8400027-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20027-5100STDY8400027-2022-01-02.vcf.gz,,,, +VBS20028-5100STDY8400028,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20028-5100STDY8400028-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20028-5100STDY8400028-2022-01-02.vcf.gz,,,, +VBS20029-5100STDY8400029,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20029-5100STDY8400029-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20029-5100STDY8400029-2022-01-02.vcf.gz,,,, +VBS20030-5100STDY8400030,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20030-5100STDY8400030-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20030-5100STDY8400030-2022-01-02.vcf.gz,,,, diff --git a/tests/anoph/test_base.py b/tests/anoph/test_base.py index c49486a3f..d7ce869f5 100644 --- a/tests/anoph/test_base.py +++ b/tests/anoph/test_base.py @@ -8,6 +8,7 @@ from pytest_cases import parametrize_with_cases from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data.anoph.base import AnophelesBase @@ -90,6 +91,22 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesBase( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + ) + + +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_config(fixture, api): config = api.config diff --git a/tests/anoph/test_g123.py b/tests/anoph/test_g123.py index 80ca3a9e3..288337082 100644 --- a/tests/anoph/test_g123.py +++ b/tests/anoph/test_g123.py @@ -4,6 +4,7 @@ import bokeh.models from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -123,6 +124,29 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesG123Analysis( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_afar1.TAXON_COLORS, + default_phasing_analysis="farauti", + ) + + +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + def check_g123_gwss(*, api, g123_params): # Run main gwss function under test. x, g123 = api.g123_gwss(**g123_params) diff --git a/tests/anoph/test_heterozygosity.py b/tests/anoph/test_heterozygosity.py index 10118bbf3..94557c23f 100644 --- a/tests/anoph/test_heterozygosity.py +++ b/tests/anoph/test_heterozygosity.py @@ -5,6 +5,7 @@ from pytest_cases import parametrize_with_cases from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -108,6 +109,28 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesHetAnalysis( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_afar1.TAXON_COLORS, + ) + + +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_plot_heterozygosity_track(fixture, api: AnophelesHetAnalysis): # Set up test parameters. diff --git a/tests/anoph/test_sample_metadata.py b/tests/anoph/test_sample_metadata.py index 59211924a..998011a46 100644 --- a/tests/anoph/test_sample_metadata.py +++ b/tests/anoph/test_sample_metadata.py @@ -11,6 +11,7 @@ from typeguard import suppress_type_checks from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -234,6 +235,24 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesSampleMetadata( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + taxon_colors=_afar1.TAXON_COLORS, + ) + + +@case +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @case def case_ag3_sim_unrestricted_use_only( ag3_sim_fixture, ag3_sim_unrestricted_use_only_api diff --git a/tests/anoph/test_snp_data.py b/tests/anoph/test_snp_data.py index c6162793a..123518264 100644 --- a/tests/anoph/test_snp_data.py +++ b/tests/anoph/test_snp_data.py @@ -13,6 +13,7 @@ from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -131,6 +132,28 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesSnpData( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + ) + + +@case(tags=["no_sex_calls", "single-sampleset"]) +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_open_snp_sites(fixture, api: AnophelesSnpData): root = api.open_snp_sites() diff --git a/tests/anoph/test_snp_frq.py b/tests/anoph/test_snp_frq.py index 1e5bda529..5f0240d14 100644 --- a/tests/anoph/test_snp_frq.py +++ b/tests/anoph/test_snp_frq.py @@ -8,6 +8,7 @@ from numpy.testing import assert_allclose, assert_array_equal from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -134,6 +135,29 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesSnpFrequencyAnalysis( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_afar1.TAXON_COLORS, + ) + + +@case(tags="single-sampleset") +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + expected_alleles = list("ACGT") expected_effects = [ "FIVE_PRIME_UTR", diff --git a/tests/integration/test_afar1.py b/tests/integration/test_afar1.py new file mode 100644 index 000000000..fabb763aa --- /dev/null +++ b/tests/integration/test_afar1.py @@ -0,0 +1,20 @@ +from malariagen_data import Afar1 + + +def setup_afar1( + url="simplecache::gs://vo_afar_release_master_us_central1/", **kwargs +): + kwargs.setdefault("check_location", False) + kwargs.setdefault("show_progress", False) + if url is None: + return Afar1(**kwargs) + if url.startswith("simplecache::"): + kwargs["simplecache"] = dict(cache_storage="gcs_cache") + return Afar1(url, **kwargs) + + +def test_repr(): + afar1 = setup_afar1(check_location=True) + assert isinstance(afar1, Afar1) + r = repr(afar1) + assert isinstance(r, str)