From c67397a88e7b63fbcc4aeffa9a2048e7f7a71fd7 Mon Sep 17 00:00:00 2001 From: dglemos Date: Fri, 3 Oct 2025 10:58:50 +0100 Subject: [PATCH 1/2] Update SpliceAI description --- SpliceAI.pm | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/SpliceAI.pm b/SpliceAI.pm index 797388e3..259fc020 100644 --- a/SpliceAI.pm +++ b/SpliceAI.pm @@ -37,6 +37,8 @@ limitations under the License. SpliceAI is a deep neural network, developed by Illumina, Inc that predicts splice junctions from an arbitrary pre-mRNA transcript sequence. By default, this plugin appends all scores from SpliceAI files. + The plugin works with any file outputed by the SpliceAI tool - Ensembl provides scores + for all SNVs overlapping MANE transcripts. For more details see: 1. Download the input files Delta score of a variant, defined as the maximum of (DS_AG, DS_AL, DS_DG, DS_DL), ranges from 0 to 1 and can be interpreted as the probability of the variant being @@ -89,23 +91,27 @@ limitations under the License. 1. Download the input files: - - The Illumina-generated files with the annotations for all possible substitutions (snv), 1 base insertions - and 1-4 base deletions (indel), within genes are available through basespace - (https://basespace.illumina.com/s/otSPW8hnhaZR). + - Ensembl-generated files (only available for GRCh38) + We calculated SpliceAI scores for all possible substitutions (SNV) within MANE Select transcripts. + The scores are available through the Ensembl FTP at https://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_plugins/ + Note: Ensembl does not provide INDEL annotations, please use the Illumina's files for INDEL scores. + To know more details on how the scores were calculated please read: + https://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_plugins/README + + - Illumina-generated files + Illumina calculated scores for all possible substitutions (SNV), 1 base insertions + and 1-4 base deletions (INDEL), within genes are available through basespace + (https://basespace.illumina.com/s/otSPW8hnhaZR). - To download via Illumina's basespace: - 1. Log-in to your Illumina account or sign-up (for free) if you do not have one. - 2. Once you're in, a "Share Project" pop-up will appear - click "accept". + 1. Log-in to your Illumina account or sign-up (for free) if you do not have one. + 2. Once you're in, a "Share Project" pop-up will appear - click "accept". 3. A smaller pop-up in the bottom right will then read "Share Accepted". Click "Predicting splicing from primary sequence". 4. You will get a list of files. Select "genome_scores_v1.3". - 5. You will get an info/landing page. Under "Analysis: genome_scores_v1.3", select "FILES". - 6. Click the file icon next to "genome_scores_v1.3" and you will get a list of available files. - 7. Click filenames to download the relevant files - note that raw/masked, hg19/hg38 and snv/indel files are available. - - - The Ensembl-generated files with the annotations for all possible substitutions (snv), 1 base insertions, within genes are available through Ensembl - (https://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_plugins/). Ensembl does not provide indel annotations, however, - Ensembl-generated files include annotations for Ensembl MANE select transcripts for v107 and v110 releases. + 5. You will get an info/landing page. Under "Analysis: genome_scores_v1.3", select "FILES". + 6. Click the file icon next to "genome_scores_v1.3" and you will get a list of available files. + 7. Click filenames to download the relevant files - note that raw/masked, hg19/hg38 and snv/indel files are available. - 2. Tabix the files (if derived from Illumina). .tbi files are provided for Ensembl-derived VCFs. + 2. For Illumina files: tabix the files (.tbi files are provided for Ensembl-derived VCFs). - GRCh37: tabix -p vcf spliceai_scores.raw.snv.hg19.vcf.gz @@ -122,8 +128,8 @@ limitations under the License. ./vep -i variations.vcf --plugin SpliceAI,snv=/path/to/spliceai_scores.raw.snv.hg38.vcf.gz,indel=/path/to/spliceai_scores.raw.indel.hg38.vcf.gz,cutoff=0.5 ./vep -i variations.vcf --plugin SpliceAI,snv=/path/to/spliceai_scores.raw.snv.hg38.vcf.gz,indel=/path/to/spliceai_scores.raw.indel.hg38.vcf.gz,split_output=1 - - Or with Ensembl files: - ./vep -i variations.vcf --plugin SpliceAI,snv=/path/to/spliceai_scores.masked.snv.ensembl_mane.grch38.110.vcf.gz,indel=/path/to/spliceai_scores.masked.indel.hg38.vcf.gz + - Or with Ensembl files (only for SNV): + ./vep -i variations.vcf --plugin SpliceAI,snv=/path/to/spliceai_scores.masked.snv.ensembl_mane_v1.4.grch38.vcf.gz,indel=/path/to/spliceai_scores.masked.indel.hg38.vcf.gz =cut From 5075bada86e0955280c14f7eb83988b27b454495 Mon Sep 17 00:00:00 2001 From: dglemos Date: Fri, 3 Oct 2025 14:35:11 +0100 Subject: [PATCH 2/2] Update plugin form text --- plugin_config.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/plugin_config.txt b/plugin_config.txt index ade6a0ae..8f03bda8 100644 --- a/plugin_config.txt +++ b/plugin_config.txt @@ -1117,11 +1117,12 @@ my $VEP_PLUGIN_CONFIG = { "form" => [ { "name" => "file_type", - "label" => "Select analysis mode", + "label" => "Select input files", "type" => "dropdown", + "helptip" => "Select the type of scores for annotation. SpliceAI's authors recommend using raw files for alternative splicing analysis and masked files for variant interpretation.", "values" => [ - { "value" => "snv", "caption" => "Masked scores (Ensembl/GENCODE v24 canonical transcripts)" }, - { "value" => "snv_ensembl", "caption" => "Raw scores (Ensembl/GENCODE v37 MANE transcripts, SNV only)" } + { "value" => "snv", "caption" => "Masked scores for SNVs and INDELs from Illumina's pre-calculated scores" }, + { "value" => "snv_ensembl", "caption" => "Raw scores for SNVs overlapping MANE Select transcripts; INDELs from Illumina's pre-calculated scores" } ], "value" => "snv", },