From 6d38932b9b2e2ca9faf4839a265268f147c43c38 Mon Sep 17 00:00:00 2001 From: Iftiquar Ali Date: Tue, 31 Mar 2026 23:23:35 +0530 Subject: [PATCH] Add Afar1 species support for Anopheles farauti in the Pacific Implement the Afar1 API client class for accessing An. farauti genomic data, following the established pattern of existing species (Adir1, Amin1). This enables open-source genomic surveillance tools for a key malaria vector in the Pacific region. Changes: - Add malariagen_data/afar1.py with Afar1 class inheriting from AnophelesDataResource, configured for GCS bucket vo_afar_release - Register Afar1 in malariagen_data/__init__.py - Add Afar1Simulator to tests/anoph/conftest.py with simulated genome, GFF, SNP, and metadata generation - Add test fixture metadata CSVs for Pacific region samples (PNG, SLB, VUT) - Add case_afar1 test parametrization across 6 test modules (base, snp_data, snp_frq, sample_metadata, heterozygosity, g123) - Add skeleton integration test tests/integration/test_afar1.py - Add Sphinx API documentation docs/source/Afar1.rst - Add Afar1 card to docs/source/index.rst landing page - Fix Adar1 __repr__/_repr_html_ doc URLs pointing to Adir1.html Made-with: Cursor --- docs/source/Afar1.rst | 150 +++++++++ docs/source/index.rst | 8 + malariagen_data/__init__.py | 1 + malariagen_data/adar1.py | 4 +- malariagen_data/afar1.py | 239 ++++++++++++++ tests/anoph/conftest.py | 298 ++++++++++++++++++ .../samples.cohorts.csv | 31 ++ .../sequence_qc_stats.csv | 31 ++ .../samples.meta.csv | 31 ++ .../wgs_accession_data.csv | 31 ++ .../wgs_snp_data.csv | 31 ++ tests/anoph/test_base.py | 17 + tests/anoph/test_g123.py | 24 ++ tests/anoph/test_heterozygosity.py | 23 ++ tests/anoph/test_sample_metadata.py | 19 ++ tests/anoph/test_snp_data.py | 23 ++ tests/anoph/test_snp_frq.py | 24 ++ tests/integration/test_afar1.py | 20 ++ 18 files changed, 1003 insertions(+), 2 deletions(-) create mode 100644 docs/source/Afar1.rst create mode 100644 malariagen_data/afar1.py create mode 100644 tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/cohorts_20260101/1300-VO-PG-BEEBE-VMF00210/samples.cohorts.csv create mode 100644 tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/curation/1300-VO-PG-BEEBE-VMF00210/sequence_qc_stats.csv create mode 100644 tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/samples.meta.csv create mode 100644 tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_accession_data.csv create mode 100644 tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_snp_data.csv create mode 100644 tests/integration/test_afar1.py diff --git a/docs/source/Afar1.rst b/docs/source/Afar1.rst new file mode 100644 index 000000000..cf4345c99 --- /dev/null +++ b/docs/source/Afar1.rst @@ -0,0 +1,150 @@ +Afar1 +===== + +This page provides a curated list of functions and properties available in the ``malariagen_data`` API +for data on mosquitoes from *Anopheles farauti*. + +To set up the API, use the following code:: + + import malariagen_data + afar1 = malariagen_data.Afar1() + +All the functions below can then be accessed as methods on the ``afar1`` object. E.g., to call the +``sample_metadata()`` function, do:: + + df_samples = afar1.sample_metadata() + +For more information about the data and terms of use, please see the +`MalariaGEN website `_ or contact support@malariagen.net. + +.. currentmodule:: malariagen_data.afar1.Afar1 + +Basic data access +----------------- +.. autosummary:: + :toctree: generated/ + + releases + sample_sets + lookup_release + lookup_study + +Reference genome data access +---------------------------- +.. autosummary:: + :toctree: generated/ + + contigs + genome_sequence + genome_features + plot_transcript + plot_genes + +Sample metadata access +---------------------- +.. autosummary:: + :toctree: generated/ + + sample_metadata + add_extra_metadata + clear_extra_metadata + lookup_sample + count_samples + plot_samples_bar + plot_samples_interactive_map + plot_sample_location_mapbox + plot_sample_location_geo + wgs_data_catalog + cohorts + +SNP data access +--------------- +.. autosummary:: + :toctree: generated/ + + site_mask_ids + snp_calls + snp_allele_counts + plot_snps + site_annotations + is_accessible + biallelic_snp_calls + biallelic_diplotypes + biallelic_snps_to_plink + +Integrative genomics viewer (IGV) +--------------------------------- +.. autosummary:: + :toctree: generated/ + + igv + view_alignments + +SNP frequency analysis +---------------------- +.. autosummary:: + :toctree: generated/ + + snp_allele_frequencies + snp_allele_frequencies_advanced + aa_allele_frequencies + aa_allele_frequencies_advanced + plot_frequencies_heatmap + plot_frequencies_time_series + plot_frequencies_interactive_map + +Principal components analysis (PCA) +----------------------------------- +.. autosummary:: + :toctree: generated/ + + pca + plot_pca_variance + plot_pca_coords + plot_pca_coords_3d + +Genetic distance and neighbour-joining trees (NJT) +-------------------------------------------------- +.. autosummary:: + :toctree: generated/ + + plot_njt + njt + biallelic_diplotype_pairwise_distances + +Heterozygosity analysis +----------------------- +.. autosummary:: + :toctree: generated/ + + plot_heterozygosity + roh_hmm + plot_roh + +Diversity analysis +------------------ +.. autosummary:: + :toctree: generated/ + + cohort_diversity_stats + diversity_stats + plot_diversity_stats + +Diplotype clustering +-------------------- +.. autosummary:: + :toctree: generated/ + + plot_diplotype_clustering + plot_diplotype_clustering_advanced + +Fst analysis +------------ +.. autosummary:: + :toctree: generated/ + + average_fst + pairwise_average_fst + plot_pairwise_average_fst + fst_gwss + plot_fst_gwss diff --git a/docs/source/index.rst b/docs/source/index.rst index f9658a78e..75bf13d68 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -43,6 +43,14 @@ API documentation .. image:: https://phil.cdc.gov//PHIL_Images/8777/8777_lores.jpg + .. grid-item-card:: ``Afar1`` + :link: Afar1 + :link-type: doc + + *Anopheles farauti*. + + .. image:: https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/Anopheles_farauti.jpg/640px-Anopheles_farauti.jpg + Documentation for the `Pf7 `_ (*Plasmodium falciparum*) and `Pv4 `_ (*Plasmodium vivax*) APIs is also available, currently hosted on a separate site. diff --git a/malariagen_data/__init__.py b/malariagen_data/__init__.py index 0934a0359..2f8ebb77e 100644 --- a/malariagen_data/__init__.py +++ b/malariagen_data/__init__.py @@ -2,6 +2,7 @@ from .adar1 import Adar1 from .adir1 import Adir1 from .af1 import Af1 +from .afar1 import Afar1 from .ag3 import Ag3 from .amin1 import Amin1 from .anopheles import AnophelesDataResource, Region diff --git a/malariagen_data/adar1.py b/malariagen_data/adar1.py index 1340a9554..4ca6f7c35 100644 --- a/malariagen_data/adar1.py +++ b/malariagen_data/adar1.py @@ -152,7 +152,7 @@ def __repr__(self): f"Please note that data are subject to terms of use,\n" f"for more information see https://www.malariagen.net/data\n" f"or contact support@malariagen.net. For API documentation see \n" - f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html" + f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adar1.html" ) return text @@ -167,7 +167,7 @@ def _repr_html_(self): Please note that data are subject to terms of use, for more information see the MalariaGEN website or contact support@malariagen.net. - See also the Adir1 API docs. + See also the Adar1 API docs. diff --git a/malariagen_data/afar1.py b/malariagen_data/afar1.py new file mode 100644 index 000000000..df88f1377 --- /dev/null +++ b/malariagen_data/afar1.py @@ -0,0 +1,239 @@ +import sys + +import plotly.express as px # type: ignore + +import malariagen_data +from .anopheles import AnophelesDataResource + +MAJOR_VERSION_NUMBER = 1 +MAJOR_VERSION_PATH = "v1.0" +CONFIG_PATH = "v1.0-config.json" +GCS_DEFAULT_URL = "gs://vo_afar_release_master_us_central1/" +GCS_DEFAULT_PUBLIC_URL = "gs://vo_afar_release_master_us_central1/" +GCS_REGION_URLS = { + "us-central1": "gs://vo_afar_release_master_us_central1", +} + +TAXON_PALETTE = px.colors.qualitative.Plotly +TAXON_COLORS = { + "farauti": TAXON_PALETTE[0], +} + +XPEHH_GWSS_CACHE_NAME = "afar1_xpehh_gwss_v1" +IHS_GWSS_CACHE_NAME = "afar1_ihs_gwss_v1" +ROH_HMM_CACHE_NAME = "afar1_roh_hmm_v1" + + +class Afar1(AnophelesDataResource): + """Provides access to data from Afar1.0 releases. + + Parameters + ---------- + url : str, optional + Base path to data. Defaults to use Google Cloud Storage, or can + be a local path on your file system if data have been downloaded. + site_filters_analysis : str, optional + Site filters analysis version. + bokeh_output_notebook : bool, optional + If True (default), configure bokeh to output plots to the notebook. + results_cache : str, optional + Path to directory on local file system to save results. + log : str or stream, optional + File path or stream output for logging messages. + debug : bool, optional + Set to True to enable debug level logging. + show_progress : bool, optional + If True, show a progress bar during longer-running computations. + The default can be overridden using an environmental variable + named MGEN_SHOW_PROGRESS. + check_location : bool, optional + If True, use ipinfo to check the location of the client system. + **kwargs + Passed through to fsspec when setting up file system access. + + Examples + -------- + Access data from Google Cloud Storage (default): + + >>> import malariagen_data + >>> afar1 = malariagen_data.Afar1() + + Access data downloaded to a local file system: + + >>> afar1 = malariagen_data.Afar1("/local/path/to/vo_afar_release/") + + Access data from Google Cloud Storage, with caching on the local file system + in a directory named "gcs_cache": + + >>> afar1 = malariagen_data.Afar1( + ... "simplecache::gs://vo_afar_release_master_us_central1", + ... simplecache=dict(cache_storage="gcs_cache"), + ... ) + + Set up caching of some longer-running computations on the local file system, + in a directory named "results_cache": + + >>> afar1 = malariagen_data.Afar1(results_cache="results_cache") + + """ + + _xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME + _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME + _roh_hmm_cache_name = ROH_HMM_CACHE_NAME + + def __init__( + self, + url=None, + public_url=GCS_DEFAULT_PUBLIC_URL, + bokeh_output_notebook=True, + results_cache=None, + log=sys.stdout, + debug=False, + show_progress=None, + check_location=True, + cohorts_analysis=None, + site_filters_analysis=None, + discordant_read_calls_analysis=None, + pre=False, + tqdm_class=None, + unrestricted_use_only=False, + surveillance_use_only=False, + **storage_options, + ): + super().__init__( + url=url, + public_url=public_url, + config_path=CONFIG_PATH, + cohorts_analysis=cohorts_analysis, + aim_analysis=None, + aim_metadata_dtype=None, + aim_ids=None, + aim_palettes=None, + site_filters_analysis=site_filters_analysis, + discordant_read_calls_analysis=discordant_read_calls_analysis, + default_site_mask="farauti", + default_phasing_analysis="farauti", + default_coverage_calls_analysis="farauti", + bokeh_output_notebook=bokeh_output_notebook, + results_cache=results_cache, + log=log, + debug=debug, + show_progress=show_progress, + check_location=check_location, + pre=pre, + gcs_default_url=GCS_DEFAULT_URL, + gcs_region_urls=GCS_REGION_URLS, + major_version_number=MAJOR_VERSION_NUMBER, + major_version_path=MAJOR_VERSION_PATH, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + storage_options=storage_options, + tqdm_class=tqdm_class, + taxon_colors=TAXON_COLORS, + virtual_contigs=None, + inversion_tag_path=None, + unrestricted_use_only=unrestricted_use_only, + surveillance_use_only=surveillance_use_only, + ) + + def __repr__(self): + text = ( + f"\n" + f"Storage URL : {self._url}\n" + f"Data releases available : {', '.join(self._available_releases)}\n" + f"Results cache : {self._results_cache}\n" + f"Cohorts analysis : {self._cohorts_analysis}\n" + f"Site filters analysis : {self._site_filters_analysis}\n" + f"Software version : malariagen_data {malariagen_data.__version__}\n" + f"Client location : {self.client_location}\n" + f"Data filtered to unrestricted use only: {self._unrestricted_use_only}\n" + f"Data filtered to surveillance use only: {self._surveillance_use_only}\n" + f"Relevant data releases : {', '.join(self.releases)}\n" + f"---\n" + f"Please note that data are subject to terms of use,\n" + f"for more information see https://www.malariagen.net/data\n" + f"or contact support@malariagen.net. For API documentation see \n" + f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Afar1.html" + ) + return text + + def _repr_html_(self): + html = f""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MalariaGEN Afar1 API client
+ Please note that data are subject to terms of use, + for more information see + the MalariaGEN website or contact support@malariagen.net. + See also the Afar1 API docs. +
+ Storage URL + {self._url}
+ Data releases available + {", ".join(self._available_releases)}
+ Results cache + {self._results_cache}
+ Cohorts analysis + {self._cohorts_analysis}
+ Site filters analysis + {self._site_filters_analysis}
+ Software version + malariagen_data {malariagen_data.__version__}
+ Client location + {self.client_location}
+ Data filtered for unrestricted use only + {self._unrestricted_use_only}
+ Data filtered for surveillance use only + {self._surveillance_use_only}
+ Relevant data releases + {", ".join(self.releases)}
+ """ + return html diff --git a/tests/anoph/conftest.py b/tests/anoph/conftest.py index 505d17c46..ed58286cb 100644 --- a/tests/anoph/conftest.py +++ b/tests/anoph/conftest.py @@ -3228,3 +3228,301 @@ def adir1_sim_fixture(fixture_dir): @pytest.fixture(scope="session") def amin1_sim_fixture(fixture_dir): return Amin1Simulator(fixture_dir=fixture_dir, rng=create_rng("Amin1")) + + +class Afar1Simulator(AnophelesSimulator): + def __init__(self, fixture_dir, rng): + super().__init__( + fixture_dir=fixture_dir, + rng=rng, + bucket="vo_afar_release_master_us_central1", + releases=("1.0",), + has_aims=False, + has_cohorts_by_quarter=True, + has_sequence_qc=True, + ) + + def init_config(self): + self.config = { + "PUBLIC_RELEASES": ["1.0"], + "GENESET_GFF3_PATH": "reference/genome/AfarF4/AfarF4.gff", + "GENOME_FASTA_PATH": "reference/genome/AfarF4/AfarF4_Genome.fasta", + "GENOME_FAI_PATH": "reference/genome/AfarF4/AfarF4_Genome.fasta.fai", + "GENOME_ZARR_PATH": "reference/genome/AfarF4/AfarF4_Genome.zarr", + "GENOME_REF_ID": "AfarF4", + "GENOME_REF_NAME": "Anopheles farauti", + "CONTIGS": [ + "scaffold_1", + "scaffold_2", + "scaffold_3", + ], + "SITE_ANNOTATIONS_ZARR_PATH": "reference/genome/AfarF4/AfarF4_SEQANNOTATION.zarr", + "DEFAULT_SITE_FILTERS_ANALYSIS": "sc_20260101", + "DEFAULT_COHORTS_ANALYSIS": "20260101", + "DEFAULT_DISCORDANT_READ_CALLS_ANALYSIS": "", + "SITE_MASK_IDS": ["farauti"], + "PHASING_ANALYSIS_IDS": ["farauti"], + } + config_path = self.bucket_path / "v1.0-config.json" + with config_path.open(mode="w") as f: + json.dump(self.config, f, indent=4) + + def init_public_release_manifest(self): + release_path = self.bucket_path / "v1.0" + release_path.mkdir(parents=True, exist_ok=True) + manifest_path = release_path / "manifest.tsv" + manifest = pd.DataFrame( + { + "sample_set": [ + "1300-VO-PG-BEEBE-VMF00210", + ], + "sample_count": [20], + "study_id": [ + "1300-VO-PG-BEEBE-VMF00210", + ], + "study_url": [ + "https://www.malariagen.net/network/where-we-work/1300-VO-PG-BEEBE-VMF00210", + ], + "terms_of_use_expiry_date": [ + "2027-01-01", + ], + "terms_of_use_url": [ + "https://malariagen.github.io/vector-data/afar1/afar1.0.html#terms-of-use", + ], + } + ) + manifest.to_csv(manifest_path, index=False, sep="\t") + self.release_manifests["1.0"] = manifest + + def init_genome_sequence(self): + base_composition = { + b"a": 0.0, + b"c": 0.0, + b"g": 0.0, + b"t": 0.0, + b"n": 0.0, + b"A": 0.295, + b"C": 0.205, + b"G": 0.205, + b"T": 0.295, + b"N": 1.0e-05, + } + path = self.bucket_path / self.config["GENOME_ZARR_PATH"] + self.genome = simulate_genome( + path=path, + contigs=self.contigs, + low=80_000, + high=120_000, + base_composition=base_composition, + rng=self.rng, + ) + self.contig_sizes = { + contig: self.genome[contig].shape[0] for contig in self.contigs + } + + def init_genome_features(self): + path = self.bucket_path / self.config["GENESET_GFF3_PATH"] + path.parent.mkdir(parents=True, exist_ok=True) + simulator = Gff3Simulator( + contig_sizes=self.contig_sizes, + rng=self.rng, + gene_type="gene", + attrs=("Note", "description"), + ) + self.genome_features = simulator.simulate_gff(path=path) + + def write_metadata(self, release, release_path, sample_set, sequence_qc=True): + n_samples_sim = ( + self.release_manifests[release] + .set_index("sample_set") + .loc[sample_set]["sample_count"] + ) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + df_general = pd.read_csv(src_path) + df_general_ds = df_general.sample( + n_samples_sim, replace=False, random_state=self.rng + ) + samples_ds = df_general_ds["sample_id"].tolist() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_general_ds.to_csv(dst_path, index=False) + + if sequence_qc: + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "curation" + / sample_set + / "sequence_qc_stats.csv" + ) + df_sequence_qc_stats = pd.read_csv(src_path) + df_sequence_qc_stats_ds = ( + df_sequence_qc_stats.set_index("sample_id") + .loc[samples_ds] + .reset_index() + ) + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "curation" + / sample_set + / "sequence_qc_stats.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_sequence_qc_stats_ds.to_csv(dst_path, index=False) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "cohorts_20260101" + / sample_set + / "samples.cohorts.csv" + ) + df_coh = pd.read_csv(src_path) + df_coh_ds = df_coh.set_index("sample_id").loc[samples_ds].reset_index() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "cohorts_20260101" + / sample_set + / "samples.cohorts.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_coh_ds.to_csv(dst_path, index=False) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_snp_data.csv" + ) + df_cat = pd.read_csv(src_path) + df_cat_ds = df_cat.set_index("sample_id").loc[samples_ds].reset_index() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_snp_data.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_cat_ds.to_csv(dst_path, index=False) + + src_path = ( + self.fixture_dir + / "vo_afar_release_master_us_central1" + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_accession_data.csv" + ) + df_cat = pd.read_csv(src_path) + df_cat_ds = df_cat.set_index("sample_id").loc[samples_ds].reset_index() + dst_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "wgs_accession_data.csv" + ) + dst_path.parent.mkdir(parents=True, exist_ok=True) + df_cat_ds.to_csv(dst_path, index=False) + + def init_metadata(self): + self.write_metadata( + release="1.0", + release_path="v1.0", + sample_set="1300-VO-PG-BEEBE-VMF00210", + ) + + def init_snp_sites(self): + path = self.bucket_path / "v1.0/snp_genotypes/all/sites/" + self.snp_sites, self.n_snp_sites = simulate_snp_sites( + path=path, contigs=self.contigs, genome=self.genome + ) + + def init_site_filters(self): + analysis = self.config["DEFAULT_SITE_FILTERS_ANALYSIS"] + + mask = "farauti" + p_pass = 0.60 + path = self.bucket_path / "v1.0/site_filters" / analysis / mask + simulate_site_filters( + path=path, + contigs=self.contigs, + p_pass=p_pass, + n_sites=self.n_snp_sites, + rng=self.rng, + ) + + def init_snp_genotypes(self): + for release, manifest in self.release_manifests.items(): + release_path = f"v{release}" + + for rec in manifest.itertuples(): + sample_set = rec.sample_set + metadata_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + + zarr_path = ( + self.bucket_path + / release_path + / "snp_genotypes" + / "all" + / sample_set + ) + + p_allele = np.array([0.981, 0.006, 0.008, 0.005]) + p_missing = np.array([0.95, 0.05]) + simulate_snp_genotypes( + zarr_path=zarr_path, + metadata_path=metadata_path, + contigs=self.contigs, + n_sites=self.n_snp_sites, + p_allele=p_allele, + p_missing=p_missing, + rng=self.rng, + ) + + def init_site_annotations(self): + path = self.bucket_path / self.config["SITE_ANNOTATIONS_ZARR_PATH"] + simulate_site_annotations(path=path, genome=self.genome, rng=self.rng) + + +@pytest.fixture(scope="session") +def afar1_sim_fixture(fixture_dir): + return Afar1Simulator(fixture_dir=fixture_dir, rng=create_rng("Afar1")) diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/cohorts_20260101/1300-VO-PG-BEEBE-VMF00210/samples.cohorts.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/cohorts_20260101/1300-VO-PG-BEEBE-VMF00210/samples.cohorts.csv new file mode 100644 index 000000000..90ff85739 --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/cohorts_20260101/1300-VO-PG-BEEBE-VMF00210/samples.cohorts.csv @@ -0,0 +1,31 @@ +sample_id,country_ISO,adm1_name,adm1_ISO,adm2_name,taxon,cohort_admin1_year,cohort_admin1_month,cohort_admin1_quarter,cohort_admin2_year,cohort_admin2_month,cohort_admin2_quarter +VBS20001-5100STDY8400001,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_03,PG-MPL_fara_2020_Q1,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_03,PG-MPL_Madang_fara_2020_Q1 +VBS20002-5100STDY8400002,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_03,PG-MPL_fara_2020_Q1,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_03,PG-MPL_Madang_fara_2020_Q1 +VBS20003-5100STDY8400003,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_04,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_04,PG-MPL_Madang_fara_2020_Q2 +VBS20004-5100STDY8400004,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_04,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_04,PG-MPL_Madang_fara_2020_Q2 +VBS20005-5100STDY8400005,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_05,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_05,PG-MPL_Madang_fara_2020_Q2 +VBS20006-5100STDY8400006,PNG,Madang Province,PG-MPL,Madang,farauti,PG-MPL_fara_2020,PG-MPL_fara_2020_06,PG-MPL_fara_2020_Q2,PG-MPL_Madang_fara_2020,PG-MPL_Madang_fara_2020_06,PG-MPL_Madang_fara_2020_Q2 +VBS20007-5100STDY8400007,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_03,PG-MPM_fara_2020_Q1,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_03,PG-MPM_Lae_fara_2020_Q1 +VBS20008-5100STDY8400008,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_04,PG-MPM_fara_2020_Q2,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_04,PG-MPM_Lae_fara_2020_Q2 +VBS20009-5100STDY8400009,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_05,PG-MPM_fara_2020_Q2,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_05,PG-MPM_Lae_fara_2020_Q2 +VBS20010-5100STDY8400010,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_06,PG-MPM_fara_2020_Q2,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_06,PG-MPM_Lae_fara_2020_Q2 +VBS20011-5100STDY8400011,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_07,PG-MPM_fara_2020_Q3,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_07,PG-MPM_Lae_fara_2020_Q3 +VBS20012-5100STDY8400012,PNG,Morobe Province,PG-MPM,Lae,farauti,PG-MPM_fara_2020,PG-MPM_fara_2020_08,PG-MPM_fara_2020_Q3,PG-MPM_Lae_fara_2020,PG-MPM_Lae_fara_2020_08,PG-MPM_Lae_fara_2020_Q3 +VBS20013-5100STDY8400013,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_03,PG-ESW_fara_2020_Q1,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_03,PG-ESW_Wewak_fara_2020_Q1 +VBS20014-5100STDY8400014,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_04,PG-ESW_fara_2020_Q2,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_04,PG-ESW_Wewak_fara_2020_Q2 +VBS20015-5100STDY8400015,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_05,PG-ESW_fara_2020_Q2,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_05,PG-ESW_Wewak_fara_2020_Q2 +VBS20016-5100STDY8400016,PNG,East Sepik Province,PG-ESW,Wewak,farauti,PG-ESW_fara_2020,PG-ESW_fara_2020_06,PG-ESW_fara_2020_Q2,PG-ESW_Wewak_fara_2020,PG-ESW_Wewak_fara_2020_06,PG-ESW_Wewak_fara_2020_Q2 +VBS20017-5100STDY8400017,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_01,SB-GU_fara_2021_Q1,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_01,SB-GU_Honiara_fara_2021_Q1 +VBS20018-5100STDY8400018,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_02,SB-GU_fara_2021_Q1,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_02,SB-GU_Honiara_fara_2021_Q1 +VBS20019-5100STDY8400019,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_03,SB-GU_fara_2021_Q1,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_03,SB-GU_Honiara_fara_2021_Q1 +VBS20020-5100STDY8400020,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_04,SB-GU_fara_2021_Q2,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_04,SB-GU_Honiara_fara_2021_Q2 +VBS20021-5100STDY8400021,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_05,SB-GU_fara_2021_Q2,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_05,SB-GU_Honiara_fara_2021_Q2 +VBS20022-5100STDY8400022,SLB,Guadalcanal Province,SB-GU,Honiara,farauti,SB-GU_fara_2021,SB-GU_fara_2021_06,SB-GU_fara_2021_Q2,SB-GU_Honiara_fara_2021,SB-GU_Honiara_fara_2021_06,SB-GU_Honiara_fara_2021_Q2 +VBS20023-5100STDY8400023,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_01,SB-ML_fara_2021_Q1,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_01,SB-ML_Auki_fara_2021_Q1 +VBS20024-5100STDY8400024,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_02,SB-ML_fara_2021_Q1,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_02,SB-ML_Auki_fara_2021_Q1 +VBS20025-5100STDY8400025,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_03,SB-ML_fara_2021_Q1,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_03,SB-ML_Auki_fara_2021_Q1 +VBS20026-5100STDY8400026,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_04,SB-ML_fara_2021_Q2,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_04,SB-ML_Auki_fara_2021_Q2 +VBS20027-5100STDY8400027,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_05,SB-ML_fara_2021_Q2,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_05,SB-ML_Auki_fara_2021_Q2 +VBS20028-5100STDY8400028,SLB,Malaita Province,SB-ML,Auki,farauti,SB-ML_fara_2021,SB-ML_fara_2021_06,SB-ML_fara_2021_Q2,SB-ML_Auki_fara_2021,SB-ML_Auki_fara_2021_06,SB-ML_Auki_fara_2021_Q2 +VBS20029-5100STDY8400029,VUT,Shefa Province,VU-SEE,Port Vila,farauti,VU-SEE_fara_2021,VU-SEE_fara_2021_01,VU-SEE_fara_2021_Q1,VU-SEE_Port-Vila_fara_2021,VU-SEE_Port-Vila_fara_2021_01,VU-SEE_Port-Vila_fara_2021_Q1 +VBS20030-5100STDY8400030,VUT,Shefa Province,VU-SEE,Port Vila,farauti,VU-SEE_fara_2021,VU-SEE_fara_2021_02,VU-SEE_fara_2021_Q1,VU-SEE_Port-Vila_fara_2021,VU-SEE_Port-Vila_fara_2021_02,VU-SEE_Port-Vila_fara_2021_Q1 diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/curation/1300-VO-PG-BEEBE-VMF00210/sequence_qc_stats.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/curation/1300-VO-PG-BEEBE-VMF00210/sequence_qc_stats.csv new file mode 100644 index 000000000..974c2fd1e --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/curation/1300-VO-PG-BEEBE-VMF00210/sequence_qc_stats.csv @@ -0,0 +1,31 @@ +sample_id,mean_cov,median_cov,modal_cov,frac_gen_cov,divergence,contam_pct,contam_LLR +VBS20001-5100STDY8400001,38.45,38,37,0.984,0.017,1.823,2785.036 +VBS20002-5100STDY8400002,42.31,42,41,0.984,0.017,2.148,3427.874 +VBS20003-5100STDY8400003,35.67,35,34,0.984,0.017,2.394,3603.611 +VBS20004-5100STDY8400004,44.12,44,43,0.984,0.017,1.995,3279.245 +VBS20005-5100STDY8400005,29.88,29,28,0.983,0.017,2.718,2556.379 +VBS20006-5100STDY8400006,37.56,37,36,0.984,0.017,2.019,2997.331 +VBS20007-5100STDY8400007,41.23,41,40,0.984,0.017,1.921,2708.875 +VBS20008-5100STDY8400008,33.91,33,33,0.984,0.017,2.482,2826.487 +VBS20009-5100STDY8400009,46.78,46,45,0.984,0.017,1.887,3068.332 +VBS20010-5100STDY8400010,35.14,35,34,0.984,0.017,2.574,2885.976 +VBS20011-5100STDY8400011,40.89,40,39,0.984,0.017,2.365,3435.056 +VBS20012-5100STDY8400012,38.22,38,37,0.984,0.018,3.147,5700.932 +VBS20013-5100STDY8400013,52.67,52,51,0.984,0.017,1.425,2115.483 +VBS20014-5100STDY8400014,43.45,43,42,0.984,0.017,1.815,2540.393 +VBS20015-5100STDY8400015,36.11,36,35,0.984,0.017,2.513,3167.230 +VBS20016-5100STDY8400016,39.78,39,38,0.984,0.017,2.330,2828.396 +VBS20017-5100STDY8400017,31.56,31,30,0.984,0.017,2.942,3757.173 +VBS20018-5100STDY8400018,48.34,48,47,0.984,0.017,1.908,3168.616 +VBS20019-5100STDY8400019,37.12,37,36,0.984,0.017,2.778,4077.416 +VBS20020-5100STDY8400020,42.56,42,41,0.984,0.017,2.170,3225.560 +VBS20021-5100STDY8400021,30.78,30,29,0.983,0.017,2.366,2572.631 +VBS20022-5100STDY8400022,40.45,40,39,0.984,0.017,2.325,3377.131 +VBS20023-5100STDY8400023,36.89,36,36,0.984,0.017,1.810,2361.649 +VBS20024-5100STDY8400024,34.23,34,33,0.984,0.017,2.420,3059.231 +VBS20025-5100STDY8400025,25.67,25,24,0.983,0.018,3.263,3409.580 +VBS20026-5100STDY8400026,33.45,33,32,0.984,0.017,2.755,3655.926 +VBS20027-5100STDY8400027,39.01,39,38,0.984,0.017,2.483,3600.917 +VBS20028-5100STDY8400028,32.34,32,31,0.984,0.017,2.738,3364.031 +VBS20029-5100STDY8400029,47.89,47,46,0.985,0.017,2.711,5803.981 +VBS20030-5100STDY8400030,41.67,41,40,0.984,0.017,2.864,5038.697 diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/samples.meta.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/samples.meta.csv new file mode 100644 index 000000000..19bb768a2 --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/samples.meta.csv @@ -0,0 +1,31 @@ +sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call +VBS20001-5100STDY8400001,PG-AF-00101,Nigel Beebe,Papua New Guinea,Madang,2020,3,-5.207,145.789,F +VBS20002-5100STDY8400002,PG-AF-00102,Nigel Beebe,Papua New Guinea,Madang,2020,3,-5.207,145.789,F +VBS20003-5100STDY8400003,PG-AF-00103,Nigel Beebe,Papua New Guinea,Madang,2020,4,-5.207,145.789,F +VBS20004-5100STDY8400004,PG-AF-00104,Nigel Beebe,Papua New Guinea,Madang,2020,4,-5.207,145.789,F +VBS20005-5100STDY8400005,PG-AF-00105,Nigel Beebe,Papua New Guinea,Madang,2020,5,-5.207,145.789,F +VBS20006-5100STDY8400006,PG-AF-00106,Nigel Beebe,Papua New Guinea,Madang,2020,6,-5.207,145.789,F +VBS20007-5100STDY8400007,PG-AF-00107,Nigel Beebe,Papua New Guinea,Lae,2020,3,-6.734,147.000,F +VBS20008-5100STDY8400008,PG-AF-00108,Nigel Beebe,Papua New Guinea,Lae,2020,4,-6.734,147.000,F +VBS20009-5100STDY8400009,PG-AF-00109,Nigel Beebe,Papua New Guinea,Lae,2020,5,-6.734,147.000,F +VBS20010-5100STDY8400010,PG-AF-00110,Nigel Beebe,Papua New Guinea,Lae,2020,6,-6.734,147.000,F +VBS20011-5100STDY8400011,PG-AF-00111,Nigel Beebe,Papua New Guinea,Lae,2020,7,-6.734,147.000,F +VBS20012-5100STDY8400012,PG-AF-00112,Nigel Beebe,Papua New Guinea,Lae,2020,8,-6.734,147.000,F +VBS20013-5100STDY8400013,PG-AF-00113,Nigel Beebe,Papua New Guinea,Wewak,2020,3,-3.866,143.860,F +VBS20014-5100STDY8400014,PG-AF-00114,Nigel Beebe,Papua New Guinea,Wewak,2020,4,-3.866,143.860,F +VBS20015-5100STDY8400015,PG-AF-00115,Nigel Beebe,Papua New Guinea,Wewak,2020,5,-3.866,143.860,F +VBS20016-5100STDY8400016,PG-AF-00116,Nigel Beebe,Papua New Guinea,Wewak,2020,6,-3.866,143.860,F +VBS20017-5100STDY8400017,SB-AF-00201,Nigel Beebe,Solomon Islands,Honiara,2021,1,-9.432,160.000,F +VBS20018-5100STDY8400018,SB-AF-00202,Nigel Beebe,Solomon Islands,Honiara,2021,2,-9.432,160.000,F +VBS20019-5100STDY8400019,SB-AF-00203,Nigel Beebe,Solomon Islands,Honiara,2021,3,-9.432,160.000,F +VBS20020-5100STDY8400020,SB-AF-00204,Nigel Beebe,Solomon Islands,Honiara,2021,4,-9.432,160.000,F +VBS20021-5100STDY8400021,SB-AF-00205,Nigel Beebe,Solomon Islands,Honiara,2021,5,-9.432,160.000,F +VBS20022-5100STDY8400022,SB-AF-00206,Nigel Beebe,Solomon Islands,Honiara,2021,6,-9.432,160.000,F +VBS20023-5100STDY8400023,SB-AF-00207,Nigel Beebe,Solomon Islands,Auki,2021,1,-8.768,160.693,F +VBS20024-5100STDY8400024,SB-AF-00208,Nigel Beebe,Solomon Islands,Auki,2021,2,-8.768,160.693,F +VBS20025-5100STDY8400025,SB-AF-00209,Nigel Beebe,Solomon Islands,Auki,2021,3,-8.768,160.693,F +VBS20026-5100STDY8400026,SB-AF-00210,Nigel Beebe,Solomon Islands,Auki,2021,4,-8.768,160.693,F +VBS20027-5100STDY8400027,SB-AF-00211,Nigel Beebe,Solomon Islands,Auki,2021,5,-8.768,160.693,F +VBS20028-5100STDY8400028,SB-AF-00212,Nigel Beebe,Solomon Islands,Auki,2021,6,-8.768,160.693,F +VBS20029-5100STDY8400029,VU-AF-00301,Nigel Beebe,Vanuatu,Port Vila,2021,1,-17.734,168.322,F +VBS20030-5100STDY8400030,VU-AF-00302,Nigel Beebe,Vanuatu,Port Vila,2021,2,-17.734,168.322,F diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_accession_data.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_accession_data.csv new file mode 100644 index 000000000..f4467440f --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_accession_data.csv @@ -0,0 +1,31 @@ +sample_id,run_ena,analysis_ena +VBS20001-5100STDY8400001,, +VBS20002-5100STDY8400002,, +VBS20003-5100STDY8400003,, +VBS20004-5100STDY8400004,, +VBS20005-5100STDY8400005,, +VBS20006-5100STDY8400006,, +VBS20007-5100STDY8400007,, +VBS20008-5100STDY8400008,, +VBS20009-5100STDY8400009,, +VBS20010-5100STDY8400010,, +VBS20011-5100STDY8400011,, +VBS20012-5100STDY8400012,, +VBS20013-5100STDY8400013,, +VBS20014-5100STDY8400014,, +VBS20015-5100STDY8400015,, +VBS20016-5100STDY8400016,, +VBS20017-5100STDY8400017,, +VBS20018-5100STDY8400018,, +VBS20019-5100STDY8400019,, +VBS20020-5100STDY8400020,, +VBS20021-5100STDY8400021,, +VBS20022-5100STDY8400022,, +VBS20023-5100STDY8400023,, +VBS20024-5100STDY8400024,, +VBS20025-5100STDY8400025,, +VBS20026-5100STDY8400026,, +VBS20027-5100STDY8400027,, +VBS20028-5100STDY8400028,, +VBS20029-5100STDY8400029,, +VBS20030-5100STDY8400030,, diff --git a/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_snp_data.csv b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_snp_data.csv new file mode 100644 index 000000000..bdc013a3b --- /dev/null +++ b/tests/anoph/fixture/vo_afar_release_master_us_central1/v1.0/metadata/general/1300-VO-PG-BEEBE-VMF00210/wgs_snp_data.csv @@ -0,0 +1,31 @@ +sample_id,alignments_bam,alignments_bam_md5,snp_genotypes_vcf,snp_genotypes_vcf_md5,snp_genotypes_zarr,snp_genotypes_zarr_md5,pipeline_version +VBS20001-5100STDY8400001,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20001-5100STDY8400001-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20001-5100STDY8400001-2022-01-02.vcf.gz,,,, +VBS20002-5100STDY8400002,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20002-5100STDY8400002-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20002-5100STDY8400002-2022-01-02.vcf.gz,,,, +VBS20003-5100STDY8400003,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20003-5100STDY8400003-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20003-5100STDY8400003-2022-01-02.vcf.gz,,,, +VBS20004-5100STDY8400004,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20004-5100STDY8400004-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20004-5100STDY8400004-2022-01-02.vcf.gz,,,, +VBS20005-5100STDY8400005,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20005-5100STDY8400005-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20005-5100STDY8400005-2022-01-02.vcf.gz,,,, +VBS20006-5100STDY8400006,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20006-5100STDY8400006-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20006-5100STDY8400006-2022-01-02.vcf.gz,,,, +VBS20007-5100STDY8400007,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20007-5100STDY8400007-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20007-5100STDY8400007-2022-01-02.vcf.gz,,,, +VBS20008-5100STDY8400008,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20008-5100STDY8400008-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20008-5100STDY8400008-2022-01-02.vcf.gz,,,, +VBS20009-5100STDY8400009,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20009-5100STDY8400009-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20009-5100STDY8400009-2022-01-02.vcf.gz,,,, +VBS20010-5100STDY8400010,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20010-5100STDY8400010-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20010-5100STDY8400010-2022-01-02.vcf.gz,,,, +VBS20011-5100STDY8400011,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20011-5100STDY8400011-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20011-5100STDY8400011-2022-01-02.vcf.gz,,,, +VBS20012-5100STDY8400012,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20012-5100STDY8400012-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20012-5100STDY8400012-2022-01-02.vcf.gz,,,, +VBS20013-5100STDY8400013,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20013-5100STDY8400013-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20013-5100STDY8400013-2022-01-02.vcf.gz,,,, +VBS20014-5100STDY8400014,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20014-5100STDY8400014-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20014-5100STDY8400014-2022-01-02.vcf.gz,,,, +VBS20015-5100STDY8400015,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20015-5100STDY8400015-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20015-5100STDY8400015-2022-01-02.vcf.gz,,,, +VBS20016-5100STDY8400016,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20016-5100STDY8400016-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20016-5100STDY8400016-2022-01-02.vcf.gz,,,, +VBS20017-5100STDY8400017,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20017-5100STDY8400017-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20017-5100STDY8400017-2022-01-02.vcf.gz,,,, +VBS20018-5100STDY8400018,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20018-5100STDY8400018-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20018-5100STDY8400018-2022-01-02.vcf.gz,,,, +VBS20019-5100STDY8400019,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20019-5100STDY8400019-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20019-5100STDY8400019-2022-01-02.vcf.gz,,,, +VBS20020-5100STDY8400020,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20020-5100STDY8400020-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20020-5100STDY8400020-2022-01-02.vcf.gz,,,, +VBS20021-5100STDY8400021,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20021-5100STDY8400021-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20021-5100STDY8400021-2022-01-02.vcf.gz,,,, +VBS20022-5100STDY8400022,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20022-5100STDY8400022-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20022-5100STDY8400022-2022-01-02.vcf.gz,,,, +VBS20023-5100STDY8400023,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20023-5100STDY8400023-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20023-5100STDY8400023-2022-01-02.vcf.gz,,,, +VBS20024-5100STDY8400024,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20024-5100STDY8400024-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20024-5100STDY8400024-2022-01-02.vcf.gz,,,, +VBS20025-5100STDY8400025,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20025-5100STDY8400025-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20025-5100STDY8400025-2022-01-02.vcf.gz,,,, +VBS20026-5100STDY8400026,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20026-5100STDY8400026-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20026-5100STDY8400026-2022-01-02.vcf.gz,,,, +VBS20027-5100STDY8400027,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20027-5100STDY8400027-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20027-5100STDY8400027-2022-01-02.vcf.gz,,,, +VBS20028-5100STDY8400028,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20028-5100STDY8400028-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20028-5100STDY8400028-2022-01-02.vcf.gz,,,, +VBS20029-5100STDY8400029,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20029-5100STDY8400029-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20029-5100STDY8400029-2022-01-02.vcf.gz,,,, +VBS20030-5100STDY8400030,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20030-5100STDY8400030-2022-01-01.bam,,https://1300-vo-pg-beebe-farauti.cog.sanger.ac.uk/VBS20030-5100STDY8400030-2022-01-02.vcf.gz,,,, diff --git a/tests/anoph/test_base.py b/tests/anoph/test_base.py index c49486a3f..d7ce869f5 100644 --- a/tests/anoph/test_base.py +++ b/tests/anoph/test_base.py @@ -8,6 +8,7 @@ from pytest_cases import parametrize_with_cases from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data.anoph.base import AnophelesBase @@ -90,6 +91,22 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesBase( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + ) + + +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_config(fixture, api): config = api.config diff --git a/tests/anoph/test_g123.py b/tests/anoph/test_g123.py index 80ca3a9e3..288337082 100644 --- a/tests/anoph/test_g123.py +++ b/tests/anoph/test_g123.py @@ -4,6 +4,7 @@ import bokeh.models from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -123,6 +124,29 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesG123Analysis( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_afar1.TAXON_COLORS, + default_phasing_analysis="farauti", + ) + + +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + def check_g123_gwss(*, api, g123_params): # Run main gwss function under test. x, g123 = api.g123_gwss(**g123_params) diff --git a/tests/anoph/test_heterozygosity.py b/tests/anoph/test_heterozygosity.py index 10118bbf3..94557c23f 100644 --- a/tests/anoph/test_heterozygosity.py +++ b/tests/anoph/test_heterozygosity.py @@ -5,6 +5,7 @@ from pytest_cases import parametrize_with_cases from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -108,6 +109,28 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesHetAnalysis( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_afar1.TAXON_COLORS, + ) + + +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_plot_heterozygosity_track(fixture, api: AnophelesHetAnalysis): # Set up test parameters. diff --git a/tests/anoph/test_sample_metadata.py b/tests/anoph/test_sample_metadata.py index 59211924a..998011a46 100644 --- a/tests/anoph/test_sample_metadata.py +++ b/tests/anoph/test_sample_metadata.py @@ -11,6 +11,7 @@ from typeguard import suppress_type_checks from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -234,6 +235,24 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesSampleMetadata( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + taxon_colors=_afar1.TAXON_COLORS, + ) + + +@case +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @case def case_ag3_sim_unrestricted_use_only( ag3_sim_fixture, ag3_sim_unrestricted_use_only_api diff --git a/tests/anoph/test_snp_data.py b/tests/anoph/test_snp_data.py index c6162793a..123518264 100644 --- a/tests/anoph/test_snp_data.py +++ b/tests/anoph/test_snp_data.py @@ -13,6 +13,7 @@ from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -131,6 +132,28 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesSnpData( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + ) + + +@case(tags=["no_sex_calls", "single-sampleset"]) +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + @parametrize_with_cases("fixture,api", cases=".") def test_open_snp_sites(fixture, api: AnophelesSnpData): root = api.open_snp_sites() diff --git a/tests/anoph/test_snp_frq.py b/tests/anoph/test_snp_frq.py index 1e5bda529..5f0240d14 100644 --- a/tests/anoph/test_snp_frq.py +++ b/tests/anoph/test_snp_frq.py @@ -8,6 +8,7 @@ from numpy.testing import assert_allclose, assert_array_equal from malariagen_data import af1 as _af1 +from malariagen_data import afar1 as _afar1 from malariagen_data import ag3 as _ag3 from malariagen_data import adir1 as _adir1 from malariagen_data import amin1 as _amin1 @@ -134,6 +135,29 @@ def case_amin1_sim(amin1_sim_fixture, amin1_sim_api): return amin1_sim_fixture, amin1_sim_api +@pytest.fixture +def afar1_sim_api(afar1_sim_fixture): + return AnophelesSnpFrequencyAnalysis( + url=afar1_sim_fixture.url, + public_url=afar1_sim_fixture.url, + config_path=_afar1.CONFIG_PATH, + major_version_number=_afar1.MAJOR_VERSION_NUMBER, + major_version_path=_afar1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="farauti", + results_cache=afar1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_afar1.TAXON_COLORS, + ) + + +@case(tags="single-sampleset") +def case_afar1_sim(afar1_sim_fixture, afar1_sim_api): + return afar1_sim_fixture, afar1_sim_api + + expected_alleles = list("ACGT") expected_effects = [ "FIVE_PRIME_UTR", diff --git a/tests/integration/test_afar1.py b/tests/integration/test_afar1.py new file mode 100644 index 000000000..fabb763aa --- /dev/null +++ b/tests/integration/test_afar1.py @@ -0,0 +1,20 @@ +from malariagen_data import Afar1 + + +def setup_afar1( + url="simplecache::gs://vo_afar_release_master_us_central1/", **kwargs +): + kwargs.setdefault("check_location", False) + kwargs.setdefault("show_progress", False) + if url is None: + return Afar1(**kwargs) + if url.startswith("simplecache::"): + kwargs["simplecache"] = dict(cache_storage="gcs_cache") + return Afar1(url, **kwargs) + + +def test_repr(): + afar1 = setup_afar1(check_location=True) + assert isinstance(afar1, Afar1) + r = repr(afar1) + assert isinstance(r, str)