diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 19e69d6..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index bb280d6..b7491d5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ build .history build -**/.DS_Store \ No newline at end of file +**/.DS_Store.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 0954c5c..9859b91 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ For installation instructions, please check the [documentation](https://github.c - [Ingesting MERFISH data with Panpipes](docs/ingesting_merfish_data/Ingesting_merfish_data_with_panpipes.md) - [Filtering and preprocessing spatial data with Panpipes](docs/preprocess_spatial_data/preprocess_spatial_data_with_panpipes.md) - [Deconvoluting spatial data with Panpipes](docs/deconvolution/deconvoluting_spatial_data_with_panpipes.md) +- [Analyzing scRNA-seq and scTCR-seq Data with Panpipes](docs/ingesting_airr_data/ingesting_airr_data.md) - [Contributing to Panpipes](docs/contributing.md) diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index 5732845..0000000 Binary files a/docs/.DS_Store and /dev/null differ diff --git a/docs/clustering/.DS_Store b/docs/clustering/.DS_Store deleted file mode 100644 index 794a2eb..0000000 Binary files a/docs/clustering/.DS_Store and /dev/null differ diff --git a/docs/clustering/figures/.DS_Store b/docs/clustering/figures/.DS_Store deleted file mode 100644 index 9e91285..0000000 Binary files a/docs/clustering/figures/.DS_Store and /dev/null differ diff --git a/docs/filtering_data/.DS_Store b/docs/filtering_data/.DS_Store deleted file mode 100644 index 427caf8..0000000 Binary files a/docs/filtering_data/.DS_Store and /dev/null differ diff --git a/docs/filtering_data/figures/.DS_Store b/docs/filtering_data/figures/.DS_Store deleted file mode 100644 index 3de9563..0000000 Binary files a/docs/filtering_data/figures/.DS_Store and /dev/null differ diff --git a/docs/filtering_data/figures/atac/.DS_Store b/docs/filtering_data/figures/atac/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/docs/filtering_data/figures/atac/.DS_Store and /dev/null differ diff --git a/docs/filtering_data/figures/prot/.DS_Store b/docs/filtering_data/figures/prot/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/docs/filtering_data/figures/prot/.DS_Store and /dev/null differ diff --git a/docs/filtering_data/figures/rna/.DS_Store b/docs/filtering_data/figures/rna/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/docs/filtering_data/figures/rna/.DS_Store and /dev/null differ diff --git a/docs/index.md b/docs/index.md index fe7d1c8..0e60dce 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,6 +11,7 @@ For installation please check the [documentation](https://panpipes-pipelines.rea ingesting_data/Ingesting_data_with_panpipes.md ingesting_multiome/ingesting_mome.md ingesting_multimodal_data/ingesting_multimodal_data.md +ingesting_airr_data/ingesting_airr_data.md ingesting_mouse/Ingesting_mouse_data_with_panpipes.md filtering_data/filtering_data_with_panpipes.md uni_multi_integration/Integrating_data_with_panpipes.md diff --git a/docs/ingesting_airr_data/files/pipeline.yml b/docs/ingesting_airr_data/files/pipeline.yml new file mode 100644 index 0000000..cffbc23 --- /dev/null +++ b/docs/ingesting_airr_data/files/pipeline.yml @@ -0,0 +1,182 @@ +# ============================================================ +# Ingest workflow Panpipes (pipeline_ingest.py) +# ============================================================ +# This file contains the parameters for the ingest workflow. +# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_ingestion_yml.html + +#-------------------------- +# Compute resources options +#-------------------------- +resources: + threads_high: 1 + threads_medium: 1 + threads_low: 1 + +condaenv: + +# -------------------------------- +# Loading and merging data options +# -------------------------------- + +# ---------------------------- +# Project name and data format +project: "vdjtest" +sample_prefix: "vdjtest" +use_existing_h5mu: False +submission_file: ../short_vdj_submission.tsv +metadatacols: sample_id,disease_state,tissue,preservation_method,celltype +concat_join_type: inner + +#-------------------------- +# Modalities in the project +modalities: + rna: True + prot: False + bcr: True + tcr: True + atac: False + +#-------------------------------- +# Integrating barcode level data +# e.g. demultiplexing with hashtags, chemical tags or lipid tagging +barcode_mtd: + include: False + path: + metadatacols: + +#------------------------------------------ +# Loading Protein data - additional options +protein_metadata_table: +index_col_choice: +load_prot_from_raw: False +subset_prot_barcodes_to_rna: False + +# ----------------------------- +# Quality Control (QC) options +# ----------------------------- + +# ----------------------------------- +# Processing of 10X cellranger metrics files +plot_10X_metrics: True + +# ---------------------------------- +# Doublet detection on RNA modality +scr: + run: True + expected_doublet_rate: 0.06 + sim_doublet_ratio: 2 + n_neighbours: 20 + min_counts: 2 + min_cells: 3 + min_gene_variability_pctl: 85 + n_prin_comps: 30 + use_thr: True + call_doublets_thr: 0.25 + +# ---------------------------- +# RNA modality Quality Control + +# Providing a gene list +# see documentation at https://panpipes-pipelines.readthedocs.io/en/latest/usage/gene_list_format.html +custom_genes_file: qc_genelist_1.0.csv + +# Defining actions on the genes + +# (for pipeline_ingest.py) +calc_proportions: hb,mt,rp +score_genes: + +# cell cycle action +ccgenes: default + +# ------------------------ +# Plotting RNA QC metrics +# all metrics should be provided as a comma separated string e.g. a,b,c +# plotqc_grouping_var: orig.ident +plotqc_grouping_var: sample_id +plotqc_rna_metrics: doublet_scores,pct_counts_mt,pct_counts_rp,pct_counts_hb,pct_counts_ig + +# ---------------------------- +# Plotting Protein QC metrics + +# requires prot_path to be included in the submission file +# all metrics should be provided as a comma separated string e.g. a,b,c +plotqc_prot_metrics: total_counts,log1p_total_counts,n_prot_by_counts,pct_counts_isotype +plot_metrics_per_prot: total_counts,log1p_total_counts,n_cells_by_counts,mean_counts + +identify_isotype_outliers: True +isotype_upper_quantile: 90 +isotype_n_pass: 2 + +# --------------------- +# Plot ATAC QC metrics + +# set is_paired to True if a multiome is ingested +is_paired: True +# If this is NOT a multiome experiment, but you have an RNA anndata that you would like to use for TSS enrichment +# use the partner_rna to specify the path to the file and provide a features_tss file with the tss coordinates +# leave empty if multiome is used +partner_rna: +features_tss: +plotqc_atac_metrics: n_genes_by_counts,total_counts,pct_fragments_in_peaks,atac_peak_region_fragments,atac_mitochondrial_reads,atac_TSS_fragments + +# --------------------------- +# Plot Repertoire QC metrics +ir_dist: + metric: + sequence: + +clonotype_definition: + receptor_arms: + dual_ir: + within_group: + +plotqc_rep_metrics: + # provide a item list + - is_cell + - extra_chains + - clonal_expansion + - rep:receptor_type + - rep:receptor_subtype + - rep:chain_pairing + - rep:multi_chain + +# ------------------------------------- +# Profiling Protein Ambient background +# ------------------------------------- +# PLEASE NOTE that this analysis can only be run if your inputs are from cellranger raw outputs + +assess_background: False +downsample_background: True + +# ----------------------------------------------------- +# Files required for profiling ambient background or running dsb normalisation + +# The pipeline requires the raw_feature_bc_matrix folder from cellranger or equivalent, +# specified in the submission file path with {mod}_filetype set to "cellranger," "cellranger_multi," or "10X_h5" +# for automatic search of .h5 or matrix folder for profiling ambient background or running dsb normalization. + +#------------------------------------------- +# Investigate per-channel antibody staining +channel_col: sample_id +save_norm_prot_mtx: False + +#---------------------- +# Protein normalization +#---------------------- + +normalisation_methods: clr + +#----------------------------------------------- +# Centered log ratio (CLR) normalization options + +# margin determines whether you normalise per cell (as you would for RNA), +# or by feature (recommended, due to the variable nature of prot assays). +# CLR margin 1 is recommended for informative qc plots in this pipeline +# 0 = normalise row-wise (per cell) +# 1 = normalise column-wise (per feature, recommended) +clr_margin: 1 + +#-------------------------------------------------------------- +# Denoised and Scaled by Background (DSB) normalization options +quantile_clipping: True diff --git a/docs/ingesting_airr_data/files/qc_genelist_1.0.csv b/docs/ingesting_airr_data/files/qc_genelist_1.0.csv new file mode 100644 index 0000000..fcaf44e --- /dev/null +++ b/docs/ingesting_airr_data/files/qc_genelist_1.0.csv @@ -0,0 +1,1227 @@ +mod,feature,group +rna,ANXA1,MarkersNeutro +rna,ARG1,MarkersNeutro +rna,BPI,MarkersNeutro +rna,CD101,MarkersNeutro +rna,CD24,MarkersNeutro +rna,CD274,MarkersNeutro +rna,CSF3R,MarkersNeutro +rna,CXCL8,MarkersNeutro +rna,DEFA3,MarkersNeutro +rna,DEFA4,MarkersNeutro +rna,DUSP1,MarkersNeutro +rna,EGR1,MarkersNeutro +rna,ELANE,MarkersNeutro +rna,FCGR3B,MarkersNeutro +rna,FTH1,MarkersNeutro +rna,G0S2,MarkersNeutro +rna,IFIT3,MarkersNeutro +rna,IFITM3,MarkersNeutro +rna,IL3RA,MarkersNeutro +rna,ISG15,MarkersNeutro +rna,ITGAM,MarkersNeutro +rna,KLF4,MarkersNeutro +rna,LCN2,MarkersNeutro +rna,LTF,MarkersNeutro +rna,LY6E,MarkersNeutro +rna,MKI67,MarkersNeutro +rna,MMP8,MarkersNeutro +rna,MPO,MarkersNeutro +rna,NEAT1,MarkersNeutro +rna,PADI4,MarkersNeutro +rna,PRTN3,MarkersNeutro +rna,PTMA,MarkersNeutro +rna,RETN,MarkersNeutro +rna,RPL5,MarkersNeutro +rna,S100A4,MarkersNeutro +rna,S100A8,MarkersNeutro +rna,S100A9,MarkersNeutro +rna,TCN1,MarkersNeutro +rna,TNFAIP2,MarkersNeutro +rna,TXN,MarkersNeutro +rna,HBA1,hb +rna,HBA2,hb +rna,HBB,hb +rna,HBD,hb +rna,HBE1,hb +rna,HBG1,hb +rna,HBG2,hb +rna,HBM,hb +rna,HBQ1,hb +rna,HBZ,hb +rna,IGKV1OR1-1,exclude +rna,IGKV3OR2-268,exclude +rna,IGKJ5,exclude +rna,IGKJ4,exclude +rna,IGKJ3,exclude +rna,IGKJ2,exclude +rna,IGKJ1,exclude +rna,IGKV4-1,exclude +rna,IGKV5-2,exclude +rna,IGKV7-3,exclude +rna,IGKV2-4,exclude +rna,IGKV1-5,exclude +rna,IGKV1-6,exclude +rna,IGKV3-7,exclude +rna,IGKV1-8,exclude +rna,IGKV1-9,exclude +rna,IGKV2-10,exclude +rna,IGKV3-11,exclude +rna,IGKV1-12,exclude +rna,IGKV1-13,exclude +rna,IGKV2-14,exclude +rna,IGKV3-15,exclude +rna,IGKV1-16,exclude +rna,IGKV1-17,exclude +rna,IGKV2-18,exclude +rna,IGKV2-19,exclude +rna,IGKV3-20,exclude +rna,IGKV6-21,exclude +rna,IGKV1-22,exclude +rna,IGKV2-23,exclude +rna,IGKV2-24,exclude +rna,IGKV3-25,exclude +rna,IGKV2-26,exclude +rna,IGKV1-27,exclude +rna,IGKV2-28,exclude +rna,IGKV2-29,exclude +rna,IGKV2-30,exclude +rna,IGKV3-31,exclude +rna,IGKV1-32,exclude +rna,IGKV1-33,exclude +rna,IGKV3-34,exclude +rna,IGKV1-35,exclude +rna,IGKV2-36,exclude +rna,IGKV1-37,exclude +rna,IGKV2-38,exclude +rna,IGKV1-39,exclude +rna,IGKV2-40,exclude +rna,IGKV2D-40,exclude +rna,IGKV1D-39,exclude +rna,IGKV2D-38,exclude +rna,IGKV1D-37,exclude +rna,IGKV2D-36,exclude +rna,IGKV1D-35,exclude +rna,IGKV3D-34,exclude +rna,IGKV1D-33,exclude +rna,IGKV1D-32,exclude +rna,IGKV3D-31,exclude +rna,IGKV2D-30,exclude +rna,IGKV2D-29,exclude +rna,IGKV2D-28,exclude +rna,IGKV1D-27,exclude +rna,IGKV2D-26,exclude +rna,IGKV3D-25,exclude +rna,IGKV2D-24,exclude +rna,IGKV2D-23,exclude +rna,IGKV1D-22,exclude +rna,IGKV6D-21,exclude +rna,IGKV3D-20,exclude +rna,IGKV2D-19,exclude +rna,IGKV2D-18,exclude +rna,IGKV6D-41,exclude +rna,IGKV1D-17,exclude +rna,IGKV1D-16,exclude +rna,IGKV3D-15,exclude +rna,IGKV2D-14,exclude +rna,IGKV1D-13,exclude +rna,IGKV1D-12,exclude +rna,IGKV3D-11,exclude +rna,IGKV2D-10,exclude +rna,IGKV1D-42,exclude +rna,IGKV1D-43,exclude +rna,IGKV1D-8,exclude +rna,IGKV3D-7,exclude +rna,IGKV1OR2-118,exclude +rna,IGKV1OR2-1,exclude +rna,IGKV2OR2-1,exclude +rna,IGKV2OR2-2,exclude +rna,IGKV1OR2-3,exclude +rna,IGKV1OR2-9,exclude +rna,IGKV2OR2-10,exclude +rna,IGKV2OR2-7D,exclude +rna,IGKV3OR2-5,exclude +rna,IGKV1OR2-6,exclude +rna,IGKV2OR2-7,exclude +rna,IGKV2OR2-8,exclude +rna,IGKV1OR2-11,exclude +rna,IGKV1OR2-108,exclude +rna,HLA-F,exclude +rna,HLA-G,exclude +rna,HLA-A,exclude +rna,HLA-E,exclude +rna,HLA-B,exclude +rna,HLA-DRA,exclude +rna,HLA-DRB5,exclude +rna,HLA-DRB1,exclude +rna,HLA-DQA1,exclude +rna,HLA-DQB1,exclude +rna,HLA-DQA2,exclude +rna,HLA-DQB2,exclude +rna,HLA-DOB,exclude +rna,HLA-DMB,exclude +rna,HLA-DMA,exclude +rna,HLA-DOA,exclude +rna,HLA-DPA1,exclude +rna,HLA-DPB1,exclude +rna,TRGJ2,exclude +rna,TRGJP2,exclude +rna,TRGJ1,exclude +rna,TRGJP,exclude +rna,TRGJP1,exclude +rna,TRGV11,exclude +rna,TRGVB,exclude +rna,TRGV10,exclude +rna,TRGV9,exclude +rna,TRGVA,exclude +rna,TRGV8,exclude +rna,TRGV7,exclude +rna,TRGV6,exclude +rna,TRGV5P,exclude +rna,TRGV5,exclude +rna,TRGV4,exclude +rna,TRGV3,exclude +rna,TRGV2,exclude +rna,TRGV1,exclude +rna,TRBV1,exclude +rna,TRBV2,exclude +rna,TRBV3-1,exclude +rna,TRBV4-1,exclude +rna,TRBV5-1,exclude +rna,TRBV6-1,exclude +rna,TRBV7-1,exclude +rna,TRBV4-2,exclude +rna,TRBV6-2,exclude +rna,TRBV7-2,exclude +rna,TRBV8-1,exclude +rna,TRBV5-2,exclude +rna,TRBV6-4,exclude +rna,TRBV7-3,exclude +rna,TRBV8-2,exclude +rna,TRBV5-3,exclude +rna,TRBV9,exclude +rna,TRBV10-1,exclude +rna,TRBV11-1,exclude +rna,TRBV12-1,exclude +rna,TRBV10-2,exclude +rna,TRBV12-2,exclude +rna,TRBV6-5,exclude +rna,TRBV7-4,exclude +rna,TRBV5-4,exclude +rna,TRBV6-6,exclude +rna,TRBV7-5,exclude +rna,TRBV5-5,exclude +rna,TRBV6-7,exclude +rna,TRBV7-6,exclude +rna,TRBV5-6,exclude +rna,TRBV6-8,exclude +rna,TRBV7-7,exclude +rna,TRBV5-7,exclude +rna,TRBV7-9,exclude +rna,TRBV13,exclude +rna,TRBV10-3,exclude +rna,TRBV11-3,exclude +rna,TRBV12-3,exclude +rna,TRBV12-4,exclude +rna,TRBV12-5,exclude +rna,TRBV14,exclude +rna,TRBV15,exclude +rna,TRBV16,exclude +rna,TRBV17,exclude +rna,TRBV18,exclude +rna,TRBV19,exclude +rna,TRBV20-1,exclude +rna,TRBV21-1,exclude +rna,TRBV22-1,exclude +rna,TRBV23-1,exclude +rna,TRBV24-1,exclude +rna,TRBV25-1,exclude +rna,TRBVA,exclude +rna,TRBV26,exclude +rna,TRBVB,exclude +rna,TRBV27,exclude +rna,TRBV28,exclude +rna,TRBV29-1,exclude +rna,TRBD1,exclude +rna,TRBJ1-1,exclude +rna,TRBJ1-2,exclude +rna,TRBJ1-3,exclude +rna,TRBJ1-4,exclude +rna,TRBJ1-5,exclude +rna,TRBJ1-6,exclude +rna,TRBJ2-1,exclude +rna,TRBJ2-2,exclude +rna,TRBJ2-2P,exclude +rna,TRBJ2-3,exclude +rna,TRBJ2-4,exclude +rna,TRBJ2-5,exclude +rna,TRBJ2-6,exclude +rna,TRBJ2-7,exclude +rna,TRBV30,exclude +rna,IGLV8OR8-1,exclude +rna,TRBV20OR9-2,exclude +rna,TRBV21OR9-2,exclude +rna,TRBV22OR9-2,exclude +rna,TRBV23OR9-2,exclude +rna,TRBV24OR9-2,exclude +rna,TRBV25OR9-2,exclude +rna,TRBV26OR9-2,exclude +rna,TRBV29OR9-2,exclude +rna,IGKV1OR9-2,exclude +rna,IGKV1OR-2,exclude +rna,IGKV1OR9-1,exclude +rna,IGKV1OR-3,exclude +rna,IGKV1OR10-1,exclude +rna,TRAV1-1,exclude +rna,TRAV1-2,exclude +rna,TRAV2,exclude +rna,TRAV3,exclude +rna,TRAV4,exclude +rna,TRAV5,exclude +rna,TRAV6,exclude +rna,TRAV7,exclude +rna,TRAV8-1,exclude +rna,TRAV9-1,exclude +rna,TRAV10,exclude +rna,TRAV11,exclude +rna,TRAV12-1,exclude +rna,TRAV8-2,exclude +rna,TRAV8-3,exclude +rna,TRAV13-1,exclude +rna,TRAV12-2,exclude +rna,TRAV8-4,exclude +rna,TRAV8-5,exclude +rna,TRAV13-2,exclude +rna,TRAV14DV4,exclude +rna,TRAV9-2,exclude +rna,TRAV15,exclude +rna,TRAV12-3,exclude +rna,TRAV8-6,exclude +rna,TRAV16,exclude +rna,TRAV17,exclude +rna,TRAV18,exclude +rna,TRAV19,exclude +rna,TRAV20,exclude +rna,TRAV21,exclude +rna,TRAV22,exclude +rna,TRAV23DV6,exclude +rna,TRAV24,exclude +rna,TRAV25,exclude +rna,TRAV26-1,exclude +rna,TRAV8-7,exclude +rna,TRAV27,exclude +rna,TRAV28,exclude +rna,TRAV29DV5,exclude +rna,TRAV30,exclude +rna,TRAV31,exclude +rna,TRAV32,exclude +rna,TRAV33,exclude +rna,TRAV26-2,exclude +rna,TRAV34,exclude +rna,TRAV35,exclude +rna,TRAV36DV7,exclude +rna,TRAV37,exclude +rna,TRAV38-1,exclude +rna,TRAV38-2DV8,exclude +rna,TRAV39,exclude +rna,TRAV40,exclude +rna,TRAV41,exclude +rna,TRDD1,exclude +rna,TRDD2,exclude +rna,TRDD3,exclude +rna,TRDJ1,exclude +rna,TRDJ4,exclude +rna,TRDJ2,exclude +rna,TRDJ3,exclude +rna,TRAJ61,exclude +rna,TRAJ60,exclude +rna,TRAJ59,exclude +rna,TRAJ58,exclude +rna,TRAJ57,exclude +rna,TRAJ56,exclude +rna,TRAJ55,exclude +rna,TRAJ54,exclude +rna,TRAJ53,exclude +rna,TRAJ52,exclude +rna,TRAJ51,exclude +rna,TRAJ50,exclude +rna,TRAJ49,exclude +rna,TRAJ48,exclude +rna,TRAJ47,exclude +rna,TRAJ46,exclude +rna,TRAJ45,exclude +rna,TRAJ44,exclude +rna,TRAJ43,exclude +rna,TRAJ42,exclude +rna,TRAJ41,exclude +rna,TRAJ40,exclude +rna,TRAJ39,exclude +rna,TRAJ38,exclude +rna,TRAJ37,exclude +rna,TRAJ36,exclude +rna,TRAJ35,exclude +rna,TRAJ34,exclude +rna,TRAJ33,exclude +rna,TRAJ32,exclude +rna,TRAJ31,exclude +rna,TRAJ30,exclude +rna,TRAJ29,exclude +rna,TRAJ28,exclude +rna,TRAJ27,exclude +rna,TRAJ26,exclude +rna,TRAJ25,exclude +rna,TRAJ24,exclude +rna,TRAJ23,exclude +rna,TRAJ22,exclude +rna,TRAJ21,exclude +rna,TRAJ20,exclude +rna,TRAJ19,exclude +rna,TRAJ18,exclude +rna,TRAJ17,exclude +rna,TRAJ16,exclude +rna,TRAJ14,exclude +rna,TRAJ13,exclude +rna,TRAJ12,exclude +rna,TRAJ11,exclude +rna,TRAJ10,exclude +rna,TRAJ9,exclude +rna,TRAJ8,exclude +rna,TRAJ7,exclude +rna,TRAJ6,exclude +rna,TRAJ5,exclude +rna,TRAJ4,exclude +rna,TRAJ3,exclude +rna,TRAJ2,exclude +rna,TRAJ1,exclude +rna,IGHJ6,exclude +rna,IGHJ3P,exclude +rna,IGHJ5,exclude +rna,IGHJ4,exclude +rna,IGHJ3,exclude +rna,IGHJ2P,exclude +rna,IGHJ2,exclude +rna,IGHJ1,exclude +rna,IGHD7-27,exclude +rna,IGHJ1P,exclude +rna,IGHD1-26,exclude +rna,IGHD6-25,exclude +rna,IGHD5-24,exclude +rna,IGHD4-23,exclude +rna,IGHD3-22,exclude +rna,IGHD2-21,exclude +rna,IGHD1-20,exclude +rna,IGHD6-19,exclude +rna,IGHD5-18,exclude +rna,IGHD4-17,exclude +rna,IGHD3-16,exclude +rna,IGHD2-15,exclude +rna,IGHD1-14,exclude +rna,IGHD6-13,exclude +rna,IGHD5-12,exclude +rna,IGHD4-11,exclude +rna,IGHD3-10,exclude +rna,IGHD3-9,exclude +rna,IGHD2-8,exclude +rna,IGHD1-7,exclude +rna,IGHD6-6,exclude +rna,IGHD5-5,exclude +rna,IGHD4-4,exclude +rna,IGHD3-3,exclude +rna,IGHD2-2,exclude +rna,IGHD1-1,exclude +rna,IGHV6-1,exclude +rna,IGHVII-1-1,exclude +rna,IGHV1-2,exclude +rna,IGHVIII-2-1,exclude +rna,IGHV1-3,exclude +rna,IGHV4-4,exclude +rna,IGHV7-4-1,exclude +rna,IGHV2-5,exclude +rna,IGHVIII-5-1,exclude +rna,IGHVIII-5-2,exclude +rna,IGHV3-6,exclude +rna,IGHV3-7,exclude +rna,IGHV3-64D,exclude +rna,IGHV5-10-1,exclude +rna,IGHV3-11,exclude +rna,IGHVIII-11-1,exclude +rna,IGHV1-12,exclude +rna,IGHV3-13,exclude +rna,IGHVIII-13-1,exclude +rna,IGHV1-14,exclude +rna,IGHV3-15,exclude +rna,IGHVII-15-1,exclude +rna,IGHV3-16,exclude +rna,IGHVIII-16-1,exclude +rna,IGHV1-17,exclude +rna,IGHV1-18,exclude +rna,IGHV3-19,exclude +rna,IGHV3-20,exclude +rna,IGHV3-21,exclude +rna,IGHV3-22,exclude +rna,IGHVII-22-1,exclude +rna,IGHVIII-22-2,exclude +rna,IGHV3-23,exclude +rna,IGHV1-24,exclude +rna,IGHV3-25,exclude +rna,IGHVIII-25-1,exclude +rna,IGHV2-26,exclude +rna,IGHVIII-26-1,exclude +rna,IGHVII-26-2,exclude +rna,IGHV7-27,exclude +rna,IGHV4-28,exclude +rna,IGHVII-28-1,exclude +rna,IGHV3-32,exclude +rna,IGHV3-30,exclude +rna,IGHVII-30-1,exclude +rna,IGHV3-30-2,exclude +rna,IGHV4-31,exclude +rna,IGHVII-30-21,exclude +rna,IGHV3-29,exclude +rna,IGHV3-33,exclude +rna,IGHVII-33-1,exclude +rna,IGHV3-33-2,exclude +rna,IGHV4-34,exclude +rna,IGHV7-34-1,exclude +rna,IGHV3-35,exclude +rna,IGHV3-36,exclude +rna,IGHV3-37,exclude +rna,IGHV3-38,exclude +rna,IGHVIII-38-1,exclude +rna,IGHV4-39,exclude +rna,IGHV7-40,exclude +rna,IGHVII-40-1,exclude +rna,IGHV3-41,exclude +rna,IGHV3-42,exclude +rna,IGHV3-43,exclude +rna,IGHVII-43-1,exclude +rna,IGHVIII-44,exclude +rna,IGHVIV-44-1,exclude +rna,IGHVII-44-2,exclude +rna,IGHV1-45,exclude +rna,IGHV1-46,exclude +rna,IGHVII-46-1,exclude +rna,IGHV3-47,exclude +rna,IGHVIII-47-1,exclude +rna,IGHV3-48,exclude +rna,IGHV3-49,exclude +rna,IGHVII-49-1,exclude +rna,IGHV3-50,exclude +rna,IGHV5-51,exclude +rna,IGHV8-51-1,exclude +rna,IGHVII-51-2,exclude +rna,IGHV3-52,exclude +rna,IGHV3-53,exclude +rna,IGHVII-53-1,exclude +rna,IGHV3-54,exclude +rna,IGHV4-55,exclude +rna,IGHV7-56,exclude +rna,IGHV3-57,exclude +rna,IGHV1-58,exclude +rna,IGHV4-59,exclude +rna,IGHV3-60,exclude +rna,IGHVII-60-1,exclude +rna,IGHV4-61,exclude +rna,IGHV3-62,exclude +rna,IGHVII-62-1,exclude +rna,IGHV3-63,exclude +rna,IGHV3-64,exclude +rna,IGHV3-65,exclude +rna,IGHVII-65-1,exclude +rna,IGHV3-66,exclude +rna,IGHV1-67,exclude +rna,IGHVII-67-1,exclude +rna,IGHVIII-67-2,exclude +rna,IGHVIII-67-3,exclude +rna,IGHVIII-67-4,exclude +rna,IGHV1-68,exclude +rna,IGHV1-69,exclude +rna,IGHV2-70D,exclude +rna,IGHV3-69-1,exclude +rna,IGHV1-69-2,exclude +rna,IGHV1-69D,exclude +rna,IGHV2-70,exclude +rna,IGHV3-71,exclude +rna,IGHV3-72,exclude +rna,IGHV3-73,exclude +rna,IGHV3-74,exclude +rna,IGHVII-74-1,exclude +rna,IGHV3-75,exclude +rna,IGHV3-76,exclude +rna,IGHVIII-76-1,exclude +rna,IGHV5-78,exclude +rna,IGHVII-78-1,exclude +rna,IGHV3-79,exclude +rna,IGHV4-80,exclude +rna,IGHV7-81,exclude +rna,IGHVIII-82,exclude +rna,IGHV1OR15-9,exclude +rna,IGHV1OR15-2,exclude +rna,IGHV3OR15-7,exclude +rna,IGHD5OR15-5A,exclude +rna,IGHD4OR15-4A,exclude +rna,IGHD3OR15-3A,exclude +rna,IGHD2OR15-2A,exclude +rna,IGHD1OR15-1A,exclude +rna,IGHV1OR15-6,exclude +rna,IGHD5OR15-5B,exclude +rna,IGHD4OR15-4B,exclude +rna,IGHD3OR15-3B,exclude +rna,IGHD2OR15-2B,exclude +rna,IGHD1OR15-1B,exclude +rna,IGHV1OR15-1,exclude +rna,IGHV1OR15-3,exclude +rna,IGHV4OR15-8,exclude +rna,IGHV1OR15-4,exclude +rna,IGHV1OR16-1,exclude +rna,IGHV1OR16-3,exclude +rna,IGHV3OR16-9,exclude +rna,IGHV2OR16-5,exclude +rna,IGHV3OR16-15,exclude +rna,IGHV3OR16-6,exclude +rna,IGHV1OR16-2,exclude +rna,IGHV3OR16-10,exclude +rna,IGHV1OR16-4,exclude +rna,IGHV3OR16-8,exclude +rna,IGHV3OR16-12,exclude +rna,IGHV3OR16-13,exclude +rna,IGHV3OR16-11,exclude +rna,IGHV3OR16-7,exclude +rna,IGLON5,exclude +rna,IGKV1OR22-5,exclude +rna,IGKV2OR22-4,exclude +rna,IGKV2OR22-3,exclude +rna,IGKV3OR22-2,exclude +rna,IGKV1OR22-1,exclude +rna,IGLVI-70,exclude +rna,IGLV4-69,exclude +rna,IGLVI-68,exclude +rna,IGLV10-54,exclude +rna,IGLV10-67,exclude +rna,IGLVIV-66-1,exclude +rna,IGLVV-66,exclude +rna,IGLVIV-65,exclude +rna,IGLVIV-64,exclude +rna,IGLVI-63,exclude +rna,IGLV1-62,exclude +rna,IGLV8-61,exclude +rna,IGLV4-60,exclude +rna,IGLVIV-59,exclude +rna,IGLVV-58,exclude +rna,IGLV6-57,exclude +rna,IGLVI-56,exclude +rna,IGLV11-55,exclude +rna,IGLVIV-53,exclude +rna,IGLV5-52,exclude +rna,IGLV1-51,exclude +rna,IGLV1-50,exclude +rna,IGLV9-49,exclude +rna,IGLV5-48,exclude +rna,IGLV1-47,exclude +rna,IGLV7-46,exclude +rna,IGLV5-45,exclude +rna,IGLV1-44,exclude +rna,IGLV7-43,exclude +rna,IGLVI-42,exclude +rna,IGLVVII-41-1,exclude +rna,IGLV1-41,exclude +rna,IGLV1-40,exclude +rna,IGLVI-38,exclude +rna,IGLV5-37,exclude +rna,IGLV1-36,exclude +rna,IGLV7-35,exclude +rna,IGLV2-34,exclude +rna,IGLV2-33,exclude +rna,IGLV3-32,exclude +rna,IGLV3-31,exclude +rna,IGLV3-30,exclude +rna,IGLV3-29,exclude +rna,IGLV2-28,exclude +rna,IGLV3-27,exclude +rna,IGLV3-26,exclude +rna,IGLVVI-25-1,exclude +rna,IGLV3-25,exclude +rna,IGLV3-24,exclude +rna,IGLV2-23,exclude +rna,IGLVVI-22-1,exclude +rna,IGLV3-22,exclude +rna,IGLV3-21,exclude +rna,IGLVI-20,exclude +rna,IGLV3-19,exclude +rna,IGLV2-18,exclude +rna,IGLV3-17,exclude +rna,IGLV3-16,exclude +rna,IGLV3-15,exclude +rna,IGLV2-14,exclude +rna,IGLV3-13,exclude +rna,IGLV3-12,exclude +rna,IGLV2-11,exclude +rna,IGLV3-10,exclude +rna,IGLV3-9,exclude +rna,IGLV2-8,exclude +rna,IGLV3-7,exclude +rna,IGLV3-6,exclude +rna,IGLV2-5,exclude +rna,IGLV3-4,exclude +rna,IGLV4-3,exclude +rna,IGLV3-2,exclude +rna,IGLV3-1,exclude +rna,IGLL5,exclude +rna,IGLJ1,exclude +rna,IGLJ2,exclude +rna,IGLJ3,exclude +rna,IGLJ4,exclude +rna,IGLJ5,exclude +rna,IGLJ6,exclude +rna,IGLJ7,exclude +rna,IGLL1,exclude +rna,IGLVIVOR22-1,exclude +rna,IGLVIVOR22-2,exclude +rna,IGHV1OR21-1,exclude +rna,HLA-C,exclude +rna,MT-ND1,mt +rna,MT-ND2,mt +rna,MT-CO1,mt +rna,MT-CO2,mt +rna,MT-ATP8,mt +rna,MT-ATP6,mt +rna,MT-CO3,mt +rna,MT-ND3,mt +rna,MT-ND4L,mt +rna,MT-ND4,mt +rna,MT-ND5,mt +rna,MT-ND6,mt +rna,MT-CYB,mt +rna,RPL22,rp +rna,RPL11,rp +rna,RPS6KA1,rp +rna,RPA2,rp +rna,RPS8,rp +rna,RPE65,rp +rna,RPF1,rp +rna,RPAP2,rp +rna,RPL5,rp +rna,RPRD2,rp +rna,RPTN,rp +rna,RPS27,rp +rna,RPS27AP5,rp +rna,RPS6KC1,rp +rna,RPS7,rp +rna,RPS27A,rp +rna,RPIA,rp +rna,RPL31,rp +rna,RPRM,rp +rna,RPE,rp +rna,RPL37A,rp +rna,RPUSD3,rp +rna,RPL32,rp +rna,RPL15,rp +rna,RPSA,rp +rna,RPL14,rp +rna,RPL29,rp +rna,RPP14,rp +rna,RPL24,rp +rna,RPN1,rp +rna,RPL22L1,rp +rna,RPL39L,rp +rna,RPL35A,rp +rna,RPL9,rp +rna,RPL34,rp +rna,RPS3A,rp +rna,RPL37,rp +rna,RPS23,rp +rna,RPS14,rp +rna,RPL26L1,rp +rna,RPP40,rp +rna,RPP21,rp +rna,RPS18,rp +rna,RPS10-NUDT3,rp +rna,RPS10,rp +rna,RPL10A,rp +rna,RPL7L1,rp +rna,RPF2,rp +rna,RPS12,rp +rna,RPS6KA2,rp +rna,RPA3,rp +rna,RP9,rp +rna,RPS6KA3,rp +rna,RPGR,rp +rna,RP2,rp +rna,RPS4X,rp +rna,RPS6KA6,rp +rna,RPA4,rp +rna,RPL36A,rp +rna,RPL36A-HNRNPH2,rp +rna,RPL39,rp +rna,RPL10,rp +rna,RP1L1,rp +rna,RP1,rp +rna,RPS20,rp +rna,RPL7,rp +rna,RPL30,rp +rna,RPL8,rp +rna,RPS6,rp +rna,RPP25L,rp +rna,RPL35,rp +rna,RPL12,rp +rna,RPL7A,rp +rna,RPLP2,rp +rna,RPL27A,rp +rna,RPS13,rp +rna,RPS6KA4,rp +rna,RPS6KB2,rp +rna,RPS3,rp +rna,RPS25,rp +rna,RPUSD4,rp +rna,RPP38,rp +rna,RPS24,rp +rna,RPP30,rp +rna,RPEL1,rp +rna,RPAP3,rp +rna,RPS26,rp +rna,RPL41,rp +rna,RPL6,rp +rna,RPH3A,rp +rna,RPLP0,rp +rna,RPL21,rp +rna,RPGRIP1,rp +rna,RPL10L,rp +rna,RPS29,rp +rna,RPL36AL,rp +rna,RPS6KL1,rp +rna,RPS6KA5,rp +rna,RPUSD2,rp +rna,RPAP1,rp +rna,RPS27L,rp +rna,RPL4,rp +rna,RPLP1,rp +rna,RPP25,rp +rna,RPS17,rp +rna,RPUSD1,rp +rna,RPL3L,rp +rna,RPS2,rp +rna,RPS15A,rp +rna,RPGRIP1L,rp +rna,RPL13,rp +rna,RPH3AL,rp +rna,RPA1,rp +rna,RPAIN,rp +rna,RPL26,rp +rna,RPL23A,rp +rna,RPL23,rp +rna,RPL19,rp +rna,RPL27,rp +rna,RPRML,rp +rna,RPS6KB1,rp +rna,RPL38,rp +rna,RPTOR,rp +rna,RPRD1A,rp +rna,RPL17-C18orf32,rp +rna,RPL17,rp +rna,RPN2,rp +rna,RPRD1B,rp +rna,RPS21,rp +rna,RPS15,rp +rna,RPL36,rp +rna,RPS28,rp +rna,RPL18A,rp +rna,RPS16,rp +rna,RPS19,rp +rna,RPL18,rp +rna,RPL13A,rp +rna,RPS11,rp +rna,RPS9,rp +rna,RPL28,rp +rna,RPS5,rp +rna,RPS4Y1,rp +rna,RPS4Y2,rp +rna,RPL3,rp +rna,RPS19BP1,rp +rna,IGKV1OR1-1,ig +rna,IGKV3OR2-268,ig +rna,IGKJ5,ig +rna,IGKJ4,ig +rna,IGKJ3,ig +rna,IGKJ2,ig +rna,IGKJ1,ig +rna,IGKV4-1,ig +rna,IGKV5-2,ig +rna,IGKV7-3,ig +rna,IGKV2-4,ig +rna,IGKV1-5,ig +rna,IGKV1-6,ig +rna,IGKV3-7,ig +rna,IGKV1-8,ig +rna,IGKV1-9,ig +rna,IGKV2-10,ig +rna,IGKV3-11,ig +rna,IGKV1-12,ig +rna,IGKV1-13,ig +rna,IGKV2-14,ig +rna,IGKV3-15,ig +rna,IGKV1-16,ig +rna,IGKV1-17,ig +rna,IGKV2-18,ig +rna,IGKV2-19,ig +rna,IGKV3-20,ig +rna,IGKV6-21,ig +rna,IGKV1-22,ig +rna,IGKV2-23,ig +rna,IGKV2-24,ig +rna,IGKV3-25,ig +rna,IGKV2-26,ig +rna,IGKV1-27,ig +rna,IGKV2-28,ig +rna,IGKV2-29,ig +rna,IGKV2-30,ig +rna,IGKV3-31,ig +rna,IGKV1-32,ig +rna,IGKV1-33,ig +rna,IGKV3-34,ig +rna,IGKV1-35,ig +rna,IGKV2-36,ig +rna,IGKV1-37,ig +rna,IGKV2-38,ig +rna,IGKV1-39,ig +rna,IGKV2-40,ig +rna,IGKV2D-40,ig +rna,IGKV1D-39,ig +rna,IGKV2D-38,ig +rna,IGKV1D-37,ig +rna,IGKV2D-36,ig +rna,IGKV1D-35,ig +rna,IGKV3D-34,ig +rna,IGKV1D-33,ig +rna,IGKV1D-32,ig +rna,IGKV3D-31,ig +rna,IGKV2D-30,ig +rna,IGKV2D-29,ig +rna,IGKV2D-28,ig +rna,IGKV1D-27,ig +rna,IGKV2D-26,ig +rna,IGKV3D-25,ig +rna,IGKV2D-24,ig +rna,IGKV2D-23,ig +rna,IGKV1D-22,ig +rna,IGKV6D-21,ig +rna,IGKV3D-20,ig +rna,IGKV2D-19,ig +rna,IGKV2D-18,ig +rna,IGKV6D-41,ig +rna,IGKV1D-17,ig +rna,IGKV1D-16,ig +rna,IGKV3D-15,ig +rna,IGKV2D-14,ig +rna,IGKV1D-13,ig +rna,IGKV1D-12,ig +rna,IGKV3D-11,ig +rna,IGKV2D-10,ig +rna,IGKV1D-42,ig +rna,IGKV1D-43,ig +rna,IGKV1D-8,ig +rna,IGKV3D-7,ig +rna,IGKV1OR2-118,ig +rna,IGKV1OR2-1,ig +rna,IGKV2OR2-1,ig +rna,IGKV2OR2-2,ig +rna,IGKV1OR2-3,ig +rna,IGKV1OR2-9,ig +rna,IGKV2OR2-10,ig +rna,IGKV2OR2-7D,ig +rna,IGKV3OR2-5,ig +rna,IGKV1OR2-6,ig +rna,IGKV2OR2-7,ig +rna,IGKV2OR2-8,ig +rna,IGKV1OR2-11,ig +rna,IGKV1OR2-108,ig +rna,IGLV8OR8-1,ig +rna,IGKV1OR9-2,ig +rna,IGKV1OR-2,ig +rna,IGKV1OR9-1,ig +rna,IGKV1OR-3,ig +rna,IGKV1OR10-1,ig +rna,IGHJ6,ig +rna,IGHJ3P,ig +rna,IGHJ5,ig +rna,IGHJ4,ig +rna,IGHJ3,ig +rna,IGHJ2P,ig +rna,IGHJ2,ig +rna,IGHJ1,ig +rna,IGHD7-27,ig +rna,IGHJ1P,ig +rna,IGHD1-26,ig +rna,IGHD6-25,ig +rna,IGHD5-24,ig +rna,IGHD4-23,ig +rna,IGHD3-22,ig +rna,IGHD2-21,ig +rna,IGHD1-20,ig +rna,IGHD6-19,ig +rna,IGHD5-18,ig +rna,IGHD4-17,ig +rna,IGHD3-16,ig +rna,IGHD2-15,ig +rna,IGHD1-14,ig +rna,IGHD6-13,ig +rna,IGHD5-12,ig +rna,IGHD4-11,ig +rna,IGHD3-10,ig +rna,IGHD3-9,ig +rna,IGHD2-8,ig +rna,IGHD1-7,ig +rna,IGHD6-6,ig +rna,IGHD5-5,ig +rna,IGHD4-4,ig +rna,IGHD3-3,ig +rna,IGHD2-2,ig +rna,IGHD1-1,ig +rna,IGHV6-1,ig +rna,IGHVII-1-1,ig +rna,IGHV1-2,ig +rna,IGHVIII-2-1,ig +rna,IGHV1-3,ig +rna,IGHV4-4,ig +rna,IGHV7-4-1,ig +rna,IGHV2-5,ig +rna,IGHVIII-5-1,ig +rna,IGHVIII-5-2,ig +rna,IGHV3-6,ig +rna,IGHV3-7,ig +rna,IGHV3-64D,ig +rna,IGHV5-10-1,ig +rna,IGHV3-11,ig +rna,IGHVIII-11-1,ig +rna,IGHV1-12,ig +rna,IGHV3-13,ig +rna,IGHVIII-13-1,ig +rna,IGHV1-14,ig +rna,IGHV3-15,ig +rna,IGHVII-15-1,ig +rna,IGHV3-16,ig +rna,IGHVIII-16-1,ig +rna,IGHV1-17,ig +rna,IGHV1-18,ig +rna,IGHV3-19,ig +rna,IGHV3-20,ig +rna,IGHV3-21,ig +rna,IGHV3-22,ig +rna,IGHVII-22-1,ig +rna,IGHVIII-22-2,ig +rna,IGHV3-23,ig +rna,IGHV1-24,ig +rna,IGHV3-25,ig +rna,IGHVIII-25-1,ig +rna,IGHV2-26,ig +rna,IGHVIII-26-1,ig +rna,IGHVII-26-2,ig +rna,IGHV7-27,ig +rna,IGHV4-28,ig +rna,IGHVII-28-1,ig +rna,IGHV3-32,ig +rna,IGHV3-30,ig +rna,IGHVII-30-1,ig +rna,IGHV3-30-2,ig +rna,IGHV4-31,ig +rna,IGHVII-30-21,ig +rna,IGHV3-29,ig +rna,IGHV3-33,ig +rna,IGHVII-33-1,ig +rna,IGHV3-33-2,ig +rna,IGHV4-34,ig +rna,IGHV7-34-1,ig +rna,IGHV3-35,ig +rna,IGHV3-36,ig +rna,IGHV3-37,ig +rna,IGHV3-38,ig +rna,IGHVIII-38-1,ig +rna,IGHV4-39,ig +rna,IGHV7-40,ig +rna,IGHVII-40-1,ig +rna,IGHV3-41,ig +rna,IGHV3-42,ig +rna,IGHV3-43,ig +rna,IGHVII-43-1,ig +rna,IGHVIII-44,ig +rna,IGHVIV-44-1,ig +rna,IGHVII-44-2,ig +rna,IGHV1-45,ig +rna,IGHV1-46,ig +rna,IGHVII-46-1,ig +rna,IGHV3-47,ig +rna,IGHVIII-47-1,ig +rna,IGHV3-48,ig +rna,IGHV3-49,ig +rna,IGHVII-49-1,ig +rna,IGHV3-50,ig +rna,IGHV5-51,ig +rna,IGHV8-51-1,ig +rna,IGHVII-51-2,ig +rna,IGHV3-52,ig +rna,IGHV3-53,ig +rna,IGHVII-53-1,ig +rna,IGHV3-54,ig +rna,IGHV4-55,ig +rna,IGHV7-56,ig +rna,IGHV3-57,ig +rna,IGHV1-58,ig +rna,IGHV4-59,ig +rna,IGHV3-60,ig +rna,IGHVII-60-1,ig +rna,IGHV4-61,ig +rna,IGHV3-62,ig +rna,IGHVII-62-1,ig +rna,IGHV3-63,ig +rna,IGHV3-64,ig +rna,IGHV3-65,ig +rna,IGHVII-65-1,ig +rna,IGHV3-66,ig +rna,IGHV1-67,ig +rna,IGHVII-67-1,ig +rna,IGHVIII-67-2,ig +rna,IGHVIII-67-3,ig +rna,IGHVIII-67-4,ig +rna,IGHV1-68,ig +rna,IGHV1-69,ig +rna,IGHV2-70D,ig +rna,IGHV3-69-1,ig +rna,IGHV1-69-2,ig +rna,IGHV1-69D,ig +rna,IGHV2-70,ig +rna,IGHV3-71,ig +rna,IGHV3-72,ig +rna,IGHV3-73,ig +rna,IGHV3-74,ig +rna,IGHVII-74-1,ig +rna,IGHV3-75,ig +rna,IGHV3-76,ig +rna,IGHVIII-76-1,ig +rna,IGHV5-78,ig +rna,IGHVII-78-1,ig +rna,IGHV3-79,ig +rna,IGHV4-80,ig +rna,IGHV7-81,ig +rna,IGHVIII-82,ig +rna,IGHV1OR15-9,ig +rna,IGHV1OR15-2,ig +rna,IGHV3OR15-7,ig +rna,IGHD5OR15-5A,ig +rna,IGHD4OR15-4A,ig +rna,IGHD3OR15-3A,ig +rna,IGHD2OR15-2A,ig +rna,IGHD1OR15-1A,ig +rna,IGHV1OR15-6,ig +rna,IGHD5OR15-5B,ig +rna,IGHD4OR15-4B,ig +rna,IGHD3OR15-3B,ig +rna,IGHD2OR15-2B,ig +rna,IGHD1OR15-1B,ig +rna,IGHV1OR15-1,ig +rna,IGHV1OR15-3,ig +rna,IGHV4OR15-8,ig +rna,IGHV1OR15-4,ig +rna,IGHV1OR16-1,ig +rna,IGHV1OR16-3,ig +rna,IGHV3OR16-9,ig +rna,IGHV2OR16-5,ig +rna,IGHV3OR16-15,ig +rna,IGHV3OR16-6,ig +rna,IGHV1OR16-2,ig +rna,IGHV3OR16-10,ig +rna,IGHV1OR16-4,ig +rna,IGHV3OR16-8,ig +rna,IGHV3OR16-12,ig +rna,IGHV3OR16-13,ig +rna,IGHV3OR16-11,ig +rna,IGHV3OR16-7,ig +rna,IGLON5,ig +rna,IGKV1OR22-5,ig +rna,IGKV2OR22-4,ig +rna,IGKV2OR22-3,ig +rna,IGKV3OR22-2,ig +rna,IGKV1OR22-1,ig +rna,IGLVI-70,ig +rna,IGLV4-69,ig +rna,IGLVI-68,ig +rna,IGLV10-54,ig +rna,IGLV10-67,ig +rna,IGLVIV-66-1,ig +rna,IGLVV-66,ig +rna,IGLVIV-65,ig +rna,IGLVIV-64,ig +rna,IGLVI-63,ig +rna,IGLV1-62,ig +rna,IGLV8-61,ig +rna,IGLV4-60,ig +rna,IGLVIV-59,ig +rna,IGLVV-58,ig +rna,IGLV6-57,ig +rna,IGLVI-56,ig +rna,IGLV11-55,ig +rna,IGLVIV-53,ig +rna,IGLV5-52,ig +rna,IGLV1-51,ig +rna,IGLV1-50,ig +rna,IGLV9-49,ig +rna,IGLV5-48,ig +rna,IGLV1-47,ig +rna,IGLV7-46,ig +rna,IGLV5-45,ig +rna,IGLV1-44,ig +rna,IGLV7-43,ig +rna,IGLVI-42,ig +rna,IGLVVII-41-1,ig +rna,IGLV1-41,ig +rna,IGLV1-40,ig +rna,IGLVI-38,ig +rna,IGLV5-37,ig +rna,IGLV1-36,ig +rna,IGLV7-35,ig +rna,IGLV2-34,ig +rna,IGLV2-33,ig +rna,IGLV3-32,ig +rna,IGLV3-31,ig +rna,IGLV3-30,ig +rna,IGLV3-29,ig +rna,IGLV2-28,ig +rna,IGLV3-27,ig +rna,IGLV3-26,ig +rna,IGLVVI-25-1,ig +rna,IGLV3-25,ig +rna,IGLV3-24,ig +rna,IGLV2-23,ig +rna,IGLVVI-22-1,ig +rna,IGLV3-22,ig +rna,IGLV3-21,ig +rna,IGLVI-20,ig +rna,IGLV3-19,ig +rna,IGLV2-18,ig +rna,IGLV3-17,ig +rna,IGLV3-16,ig +rna,IGLV3-15,ig +rna,IGLV2-14,ig +rna,IGLV3-13,ig +rna,IGLV3-12,ig +rna,IGLV2-11,ig +rna,IGLV3-10,ig +rna,IGLV3-9,ig +rna,IGLV2-8,ig +rna,IGLV3-7,ig +rna,IGLV3-6,ig +rna,IGLV2-5,ig +rna,IGLV3-4,ig +rna,IGLV4-3,ig +rna,IGLV3-2,ig +rna,IGLV3-1,ig +rna,IGLL5,ig +rna,IGLJ1,ig +rna,IGLJ2,ig +rna,IGLJ3,ig +rna,IGLJ4,ig +rna,IGLJ5,ig +rna,IGLJ6,ig +rna,IGLJ7,ig +rna,IGLL1,ig +rna,IGLVIVOR22-1,ig +rna,IGLVIVOR22-2,ig +rna,IGHV1OR21-1,ig diff --git a/docs/ingesting_airr_data/files/short_vdj_submission.tsv b/docs/ingesting_airr_data/files/short_vdj_submission.tsv new file mode 100644 index 0000000..729c2c3 --- /dev/null +++ b/docs/ingesting_airr_data/files/short_vdj_submission.tsv @@ -0,0 +1,2 @@ +sample_id rna_path rna_filetype prot_path prot_filetype tcr_path tcr_filetype bcr_path bcr_filetype disease_state tissue preservation_method celltype +human_cmv /FullProjectPath/data/outs/ cellranger_multi NA NA /FullProjectPath/data/outs/per_sample_outs/human_cmv/vdj_t/filtered_contig_annotations.csv cellranger_vdj NA NA cytomegalovirus blood NA T-lymphocyte \ No newline at end of file diff --git a/docs/ingesting_airr_data/img/rna_nUMI_vs_nGene.png b/docs/ingesting_airr_data/img/rna_nUMI_vs_nGene.png new file mode 100644 index 0000000..df69505 Binary files /dev/null and b/docs/ingesting_airr_data/img/rna_nUMI_vs_nGene.png differ diff --git a/docs/ingesting_airr_data/img/rna_nUMI_vs_pct_mito.png b/docs/ingesting_airr_data/img/rna_nUMI_vs_pct_mito.png new file mode 100644 index 0000000..8897cac Binary files /dev/null and b/docs/ingesting_airr_data/img/rna_nUMI_vs_pct_mito.png differ diff --git a/docs/ingesting_airr_data/img/tcr_clonal_expansion_barplot.png b/docs/ingesting_airr_data/img/tcr_clonal_expansion_barplot.png new file mode 100644 index 0000000..f0913c6 Binary files /dev/null and b/docs/ingesting_airr_data/img/tcr_clonal_expansion_barplot.png differ diff --git a/docs/ingesting_airr_data/img/tcr_group_abundance_receptor_subtype.png b/docs/ingesting_airr_data/img/tcr_group_abundance_receptor_subtype.png new file mode 100644 index 0000000..bfa52eb Binary files /dev/null and b/docs/ingesting_airr_data/img/tcr_group_abundance_receptor_subtype.png differ diff --git a/docs/ingesting_airr_data/img/tcr_group_abundance_receptor_type.png b/docs/ingesting_airr_data/img/tcr_group_abundance_receptor_type.png new file mode 100644 index 0000000..1f74bf4 Binary files /dev/null and b/docs/ingesting_airr_data/img/tcr_group_abundance_receptor_type.png differ diff --git a/docs/ingesting_airr_data/img/tenx_estimated_cells.png b/docs/ingesting_airr_data/img/tenx_estimated_cells.png new file mode 100644 index 0000000..73edbe6 Binary files /dev/null and b/docs/ingesting_airr_data/img/tenx_estimated_cells.png differ diff --git a/docs/ingesting_airr_data/img/tenx_mean_reads_per_cell.png b/docs/ingesting_airr_data/img/tenx_mean_reads_per_cell.png new file mode 100644 index 0000000..3818aa4 Binary files /dev/null and b/docs/ingesting_airr_data/img/tenx_mean_reads_per_cell.png differ diff --git a/docs/ingesting_airr_data/img/tenx_productive_tra_trb.png b/docs/ingesting_airr_data/img/tenx_productive_tra_trb.png new file mode 100644 index 0000000..b86351a Binary files /dev/null and b/docs/ingesting_airr_data/img/tenx_productive_tra_trb.png differ diff --git a/docs/ingesting_airr_data/img/tenx_sequencing_saturation.png b/docs/ingesting_airr_data/img/tenx_sequencing_saturation.png new file mode 100644 index 0000000..e521e31 Binary files /dev/null and b/docs/ingesting_airr_data/img/tenx_sequencing_saturation.png differ diff --git a/docs/ingesting_airr_data/img/tenx_valid_barcodes.png b/docs/ingesting_airr_data/img/tenx_valid_barcodes.png new file mode 100644 index 0000000..b8f71a9 Binary files /dev/null and b/docs/ingesting_airr_data/img/tenx_valid_barcodes.png differ diff --git a/docs/ingesting_airr_data/img/tenx_valid_umis.png b/docs/ingesting_airr_data/img/tenx_valid_umis.png new file mode 100644 index 0000000..089e4f3 Binary files /dev/null and b/docs/ingesting_airr_data/img/tenx_valid_umis.png differ diff --git a/docs/ingesting_airr_data/ingesting_airr_data.md b/docs/ingesting_airr_data/ingesting_airr_data.md new file mode 100644 index 0000000..ee5ecf2 --- /dev/null +++ b/docs/ingesting_airr_data/ingesting_airr_data.md @@ -0,0 +1,360 @@ +Analyzing scRNA-seq and scTCR-seq Data with Panpipes +=================== + +## Introduction + +This tutorial guides you through the process of analyzing single-cell RNA sequencing (scRNA-seq) and single-cell T-cell receptor sequencing (scTCR-seq) data using Panpipes. +While similar to the multimodal CITE-seq tutorial, this guide differs in two major points – it's much easier to follow and it focuses on single-cell transcriptomics and immunomics (AIRR – Adaptive Immune Receptor Repertoire) analysis. + +## Before you start + +### Running pipelines + +- **Re-running analyses:** if you need to re-run the pipeline from scratch, ensure you remove any existing `1_ingest` directories to prevent conflicts. + +- **Error handling:** while Panpipes strives to provide clear error messages, some issues may still arise. If you encounter unclear errors, please report them by opening an issue on our GitHub repository. + +- **File organization:** for convenience, all necessary files are provided in the section below. Ensure that your file and folder structures mirror the setup described to avoid potential issues. + +## Setup + +### Install panpipes + +There are several ways you can install `panpipes`, and the best options depends on your existing environment. + +#### Python venv - if you have R already installed + +If you already have R installed on your machine, we highly recommend minimize the usage of heavy package managers such as conda, and opt out for more traditional Python methods. + +To create a reproducible environment you can create a Python virtual environment in your project folder, and install packages there. It allows you to fully isolate your local environment and not contaminate or break outside projects. + +To create a Python virtual environment, do the following: + +1. Open the console and navigate to your project folder. + +2. Run the code to create the environment `python3 -m venv --prompt=panpipes .venv/`. + +3. Activate the environment via running `source .venv/bin/activate`. + +4. Now, if you run the package installation via `pip install` in console, `pip` will install packages to your local environment `.venv`. + +You can even maintain multiple Python virtual environments side by side for different projects. + +To install R dependencies for `panpipes`, run `panpipes install_r_dependencies`. + +#### Other installation options + +For Apple Silicone instructions, more magic with R and conda guide follow this tutorial - + +However, in our experience, separate installations of Python and R worked the best – you install a system-wide R, and follow the steps from the previous section on `panpipes` installation in a virtual environment. + +### Check the panpipes installation + +If everything is setup correctly, this command should work without any errors: + +```bash +panpipes --help +``` + +### Download the dataset + +Create a folder named `data` in your project folder `project`. Inside the data folder, create the following subfolders: + +``` +data +└── outs + ├── multi + │ ├── count + │ └── vdj_t + └── per_sample_outs + └── human_cmv + ├── count + └── vdj_t + +``` + +Yes, that’s quite a few folders, but don’t worry. We will provide ways to double-check whether the files are in the correct locations. For example, if you run `ls data/outs/multi/count/`, you should see the `raw_feature_bc_matrix` folder, `feature_reference.csv`, and `raw_feature_bc_matrix.h5`. + +For this tutorial, we will use the 10XGenomics data on CMV and human T-cells from this [link](https://www.10xgenomics.com/datasets/integrated-gex-totalseqc-and-tcr-analysis-of-connect-generated-library-from-5k-cmv-t-cells-2-standard). + +Here is the list of files you need to download from the link above and where you should put them. + +1. From the tab "Output and supplemental files", choose the "Raw" subsection from the "Raw output" section. Download the `Gene Expression - Feature / cell matrix (raw)` file. Extact it to `data/outs/multi/count` and rename the extracted folder to `raw_feature_bc_matrix`. + +2. From the same page, download `Gene Expression - Feature / cell matrix HDF5 (raw)` and extract the file to `data/outs/multi/count/raw_feature_bc_matrix.h5`. Note that this is a path to the file, i.e., you don't need to create a folder named `raw_feature_bc_matrix.h5`. + +3. Download `VDJ TCR - All contig annotations (CSV)` and copy it to `data/outs/multi/vdj_t/all_contig_annotations.csv`. + +4. Go to the section "Per sample outputs", subsection "Single channel", and download the file `Gene Expression - Feature Reference`. You need to copy it to two places: `data/outs/multi/count/feature_reference.csv` and `data/outs/per_sample_outs/human_cmv/count/feature_reference.csv`. + +5. Download `Summary CSV` and copy it to `data/outs/per_sample_outs/metrics_summary.csv`. + +6. Download `Gene Expression - Feature / cell matrix (per-sample)` and extract the folder to `data/outs/per_sample_outs/human_cmv/count/` with the name `sample_filtered_feature_bc_matrix`. + +7. Download `Gene Expression - Feature / cell matrix HDF5 (per-sample)` and copy it to `data/outs/per_sample_outs/human_cmv/count/sample_filtered_feature_bc_matrix.h5`. + +8. And finally, download `Gene Expression - Sample barcodes` and copy it to `data/outs/per_sample_outs/human_cmv/count/sample_filtered_barcodes.csv`. + +This is how it should look like in the end: + +``` +data/ +└── outs/ + ├── multi/ + │ ├── count/ + │ │ ├── raw_feature_bc_matrix/ + │ │ ├── feature_reference.csv + │ │ └── raw_feature_bc_matrix.h5 + │ └── vdj_t/ + │ └── all_contig_annotations.csv + └── per_sample_outs/ + ├── human_cmv/ + │ ├── count/ + │ │ ├── sample_filtered_feature_bc_matrix/ + │ │ ├── feature_reference.csv + │ │ ├── sample_filtered_barcodes.csv + │ │ └── sample_filtered_feature_bc_matrix.h5 + │ └── vdj_t/ + │ └── filtered_contig_annotations.csv + └── metrics_summary.csv +``` + +### Submission, pipeline config, and gene files + +For your convenience, we prepared necessary files for running `panpipes` with the dataset you just downloaded. + +#### Submissiong file + +Download link: [submission file](./files/short_vdj_submission.tsv) + +First, the submission file with dataset information. You need to change the file paths in the file to your project file path. Important note: you must provide full file paths. +I.e., `~/Projects/project-name/data/out/filename.csv` wouldn't work, only `/Users/yourname/Projects/project-name/data/out/filename.csv` would work. + +Besides the first column, “sample_id”, the order in which the columns are provided is not fixed, but the column names are fixed! Failing to specify the column names will result in omission of the modality from the analysis and early stopping of the pipeline. + +We find useful to generate the submission file with softwares like Numbers or Excel and save the output as a txt file to ensure that the file is properly formatted. For more examples please check our documentation on sample submission files. + +Put the submission file to the top of your project: + +``` +project-name/ +├── data/ +│ └── outs/... +└── short_vdj_submission.tsv +``` + +### Pipeline configuration file + +Download link: [pipeline.yml](./files/pipeline.yml) + +Pipeline configuration files store all the input files and settings for analysis steps. We provide a ready-to-go pipeline configuration file. Download it and put it inside the `1_ingest` folder: + +``` +project-name/ +├── 1_ingest/ +│ └── pipeline.yml +├── data/ +│ └── outs/... +└── short_vdj_submission.tsv +``` + +### List of genes for quality control + +Download link: [qc_genelist_1.0.csv](./files/qc_genelist_1.0.csv) + +`Panpipes` already comes with this resource, but for the tutorial purposes we provide a separate link to this file. Download it and put it in the `1_ingest` folder: + +``` +project-name/ +├── 1_ingest/ +│ ├── pipeline.yml +│ └── qc_genelist_1.0.csv +├── data/ +│ └── outs/... +└── short_vdj_submission.tsv +``` + +### Backup + +In case of mistakes, we recommend to create a backup folder, from which the downloaded files could be restored: + +``` +project-name/ +├── 1_ingest/ +│ ├── pipeline.yml +│ └── qc_genelist_1.0.csv +├── backup/ +│ ├── pipeline.yml +│ └── qc_genelist_1.0.csv +├── data/ +│ └── outs/... +└── short_vdj_submission.tsv +``` + +## Analysis + +### Run + +To run `panpipes`, you need to create a folder `1_ingest` on the top of the project folder, if you haven't already: + +``` +project-name/ +├── 1_ingest/ +│ ├── pipeline.yml +│ └── qc_genelist_1.0.csv +├── data/ +│ └── outs/... +└── short_vdj_submission.tsv +``` + +Open the console and do the following: + +1. Navigate to the project folder. + +2. Activate the environment, e.g., execute `source .venv/bin/activate` if you went with the Python environment. + +3. Navigate to `1_ingest` folder: `cd 1_ingest`. + +4. If you want to generate the config file and change it, or replace it with ours from the section above, you need to execute `panpipes ingest config`. But make sure to double-check that relevant `pipeline.yml` and `qc_genelist_1.0.csv` are in the correct place. + +5. Now `panpipes` is ready for running the analysis. Execute `panpipes ingest make full --local` if you are working on a local machine; execute `panpipes ingest make full` if you are working on a cluster. + +6. The pipeline will write logs to the console output and to the logs files in the `1_ingest`. If everything went smoothly, you will see something like this in the console: + +``` +2023-11-27 10:07:10,498 INFO main task - Completed Task = 'pipeline_ingest.full' +2023-11-27 10:07:10,499 INFO main experiment - job finished in 978 seconds at Mon Nov 27 10:07:10 2023 -- 27.26 12.18 0.14 0.52 -- 85d537bd-19b2-41cd-8134-5d615c1342e5 +``` + +7. If something went bad, you will see error messages, that you could analyze, ask ChatGPT or any other LLM-assistant about it, or create an issue in the repository. Detailed logs are stored in `1_ingest/logs/*.log`. + +If everything is OK, you will see plots in `1_ingest/rna`, `1_ingest/rep`, and `1_ingest/tenx_metrics`. + +Let's go over several of them. + +### 10x Genomics Metrics + +Below are automatically generated summary plots from the 10x Genomics pipeline. They help assess overall data quality and ensure that the library prep and sequencing worked as expected: + +- **Estimated Number of Cells** + Shows how many cells were captured in the dataset. + *Why it matters:* If this number is much lower than your expected cell count, you might have lost cells or had poor capture efficiency. + + ![Estimated Number of Cells](img/tenx_estimated_cells.png) + +- **Median Reads per Cell** + Indicates sequencing depth per cell. + *Why it matters:* Higher depth typically reveals more genes per cell. If too low, you might miss important transcripts. + + ![Mean Reads per Cell](img/tenx_mean_reads_per_cell.png) + +- **Sequencing Saturation** + Shows whether you’ve sufficiently captured most transcripts in each cell. + *Why it matters:* High saturation means further sequencing may not add much; low saturation suggests you might need deeper sequencing. + + ![Sequencing Saturation](img/tenx_sequencing_saturation.png) + +- **Valid Barcodes** / Valid UMIs** + Proportion of barcodes/UMIs successfully identified. + *Why it matters:* Low percentages indicate potential technical issues with library prep or sequencing. + + ![Valid Barcodes](img/tenx_valid_barcodes.png) + ![Valid UMIs](img/tenx_valid_umis.png) + +- **VDJ T-Cells with Productive TRA/TRB Contigs** + The fraction or count of T cells that have valid α or β T-cell receptor chains. + *Why it matters:* A higher number means more cells can be used for TCR-specific analyses, such as clonal expansion or antigen specificity. + + ![VDJ T-Cells with Productive Contigs](img/tenx_productive_tra_trb.png) + +### RNA QC Plots + +These plots focus on standard single-cell RNA-seq quality control metrics, helping you decide which cells are high-quality for downstream analysis. + +- **nUMI vs. nGene** + Shows how many unique transcripts (UMIs) each cell has versus how many genes were detected. + *Why it matters:* Cells with extremely low gene counts might be empty droplets; extremely high counts might be doublets. + + ![nUMI vs nGene](img/rna_nUMI_vs_nGene.png) + +- **nUMI vs. % Mitochondrial** + Plots total UMI counts against the fraction of reads mapping to mitochondrial genes. + *Why it matters:* Cells with high mitochondrial fraction (>10–15%) are often stressed or dying and may need to be excluded. + + ![nUMI vs % Mitochondrial](img/rna_nUMI_vs_pct_mito.png) + +### TCR Repertoire Plots + +These plots give insight into the diversity and clonal structure of T-cell receptors (TCR), which is key for immunology-focused projects. + +- **Bar Plot of Clonal Expansion** + Displays TCR clone sizes (how many cells share the same TCR). + *Why it matters:* Large bars indicate highly expanded clones, potentially responding to a specific antigen. + + ![TCR Clonal Expansion](img/tcr_clonal_expansion_barplot.png) + +- **Group Abundance by Receptor Type / Subtype** + Shows how different TCR receptor types (TRA/TRB) or subtypes are distributed across samples. + *Why it matters:* Highlights whether certain receptor chains or subtypes predominate in your dataset. + + ![Group Abundance Receptor Type](img/tcr_group_abundance_receptor_type.png) + + ![Group Abundance Receptor Subtype](img/tcr_group_abundance_receptor_subtype.png) + +### How to re-run the analysis + +If something went bad, it is crucial that you re-run the whole analysis from scratch. You can't (!) continue from a failed step – you need to fix the step and start the analysis from zero. For that, please see the full code to execute in the following section. + +#### Re-running the analysis without re-installation of panpipes + +Useful when you tweak something in your data. + +``` +# if you are in the 1_ingest folder; otherwise skip this step +cd .. + +rm -r 1_ingest +mkdir 1_ingest +cd 1_ingest +panpipes ingest config +cp ../backup/* ./ +panpipes ingest make full --local +``` + +#### Re-running the analysis with re-installation of panpipes + +Use it when you work on `panpipes` locally and change something in the source code. We assumer you are in the `1_ingest` folder. + +``` +cd .. && pip install -e . +rm -r ./1_ingest +mkdir 1_ingest +cd 1_ingest +panpipes ingest config +cp ../backup/* ./ +panpipes ingest make full --local +``` + +Mind the `pip install -e .`. It installs `panpipes` locally and doesn't require to changing the version of the package. However, here we assume that you work with the data analysis from the same folder, where the source code of `panpipes` is stored, i.e.,: + +``` +panpipes/ +├── 1_ingest/ +│ ├── pipeline.yml +│ └── qc_genelist_1.0.csv +├── data/ +│ └── outs/... +├── short_vdj_submission.tsv +├── docs # docs for panpipes +├── panpipes # source code +├── tests +└── ... +``` + +## Contact and troubleshooting + +For `panpipes` issues please use + +For issues with tutorials please use + +The tutorial is prepared by [Vadim Nazarov](https://github.com/vadimnazarov). Please feel free to reach out with scTCR/BCR-seq questions and if there are some issues with, well, GitHub issues. diff --git a/docs/ingesting_data/figures/.DS_Store b/docs/ingesting_data/figures/.DS_Store deleted file mode 100644 index 4093e55..0000000 Binary files a/docs/ingesting_data/figures/.DS_Store and /dev/null differ diff --git a/docs/uni_multi_integration/.DS_Store b/docs/uni_multi_integration/.DS_Store deleted file mode 100644 index 444a9cc..0000000 Binary files a/docs/uni_multi_integration/.DS_Store and /dev/null differ diff --git a/docs/uni_multi_integration/figures/.DS_Store b/docs/uni_multi_integration/figures/.DS_Store deleted file mode 100644 index c32f7ca..0000000 Binary files a/docs/uni_multi_integration/figures/.DS_Store and /dev/null differ