From 51f001f03a42b569f9250d6834d3a9194e797087 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Fri, 12 May 2023 18:01:30 +0200 Subject: [PATCH 1/7] Add pango clade definitions to profile --- rsv/profiles/pango/REFROOTA.gb | 511 +++++++++++++++++++ rsv/profiles/pango/REFROOTB.gb | 479 +++++++++++++++++ rsv/profiles/pango/amino-acid-genotypes.xlsx | Bin 0 -> 15040 bytes 3 files changed, 990 insertions(+) create mode 100644 rsv/profiles/pango/REFROOTA.gb create mode 100644 rsv/profiles/pango/REFROOTB.gb create mode 100644 rsv/profiles/pango/amino-acid-genotypes.xlsx diff --git a/rsv/profiles/pango/REFROOTA.gb b/rsv/profiles/pango/REFROOTA.gb new file mode 100644 index 0000000..416bc30 --- /dev/null +++ b/rsv/profiles/pango/REFROOTA.gb @@ -0,0 +1,511 @@ +LOCUS REFROOTA 15052 bp cRNA UNK 01-JAN-1980 +DEFINITION REFROOTA. +ACCESSION REFROOTA +VERSION REFROOTA +KEYWORDS . +SOURCE . + ORGANISM Human orthopneumovirus + Viruses; Riboviria; Orthornavirae; Negarnaviricota; Haploviricotina; + Monjiviricetes; Mononegavirales; Pneumoviridae; Orthopneumovirus. +FEATURES Location/Qualifiers + source 1..15052 + /organism="Human orthopneumovirus" + /mol_type="viral cRNA" + gene 1..484 + /gene="NS1" + /note="formerly called 1C" + /db_xref="GeneID:37607636" + mRNA 1..484 + /gene="NS1" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607636" + CDS 1..420 + /gene="NS1" + /codon_start=1 + /product="nonstructural protein 1" + /protein_id="YP_009518850.1" + /db_xref="GeneID:37607636" + /translation="MGSNSLSMIKVRLQNLFDNDEVALLKITCYTDKLIHLTNALAKAV + IHTIKLNGIVFVHVITSSDICPNNNIVVKSNFTTMPVLQNGGYIWEMMELTHCSQPNGL + IDDNCEIKFSKKLSDSTMTNYMNQLSELLGFDLNP*" + gene 504..1008 + /gene="NS2" + /note="Formerly called 1B" + /db_xref="GeneID:37607637" + mRNA 504..1008 + /gene="NS2" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607637" + CDS 536..910 + /gene="NS2" + /codon_start=1 + /product="nonstructural protein 2" + /protein_id="YP_009518851.1" + /db_xref="GeneID:37607637" + /translation="MDTTHNDTTPQRLMITDMRPLSLETIITSLTRDIITHRFIYLINH + ECIVRKLDERQATFTFLVNYEMKLLHKVGSTKYKKYTEYNTKYGTFPMPIFINHDGFLE + CIGIKPTKHTPIIYKYDLNP*" + gene 1035..2246 + /gene="N" + /db_xref="GeneID:37607638" + mRNA 1035..2246 + /gene="N" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607638" + CDS 1050..2225 + /gene="N" + /codon_start=1 + /product="nucleoprotein" + /protein_id="YP_009518852.1" + /db_xref="GeneID:37607638" + /translation="MALSKVKLNDTLNKDQLLSSSKYTIQRSTGDSIDTPNYDVQKHIN + KLCGMLLITEDANHKFTGLIGMLYAMSRLGREDTIKILRDAGYHVKANGVDVTTHRQDI + NGKEMKFEVLTLASLTTEIQINIEIESRKSYKKMLKEMGEVAPEYRHDSPDCGMIILCI + AALVITKLAAGDRSGLTAVIRRANNVLKNEMKRYKGLLPKDIANSFYEVFEKYPHFIDV + FVHFGIAQSSTRGGSRVEGIFAGLFMNAYGAGQVMLRWGVLAKSVKNIMLGHASVQAEM + EQVVEVYEYAQKLGGEAGFYHILNNPKASLLSLTQFPHFSSVVLGNAAGLGIMGEYRGT + PRNQDLYDAAKAYAEQLKENGVINYSVLDLTAEELEAIKHQLNPKDNDVEL*" + gene 2248..3188 + /gene="P" + /db_xref="GeneID:37607639" + mRNA 2248..3188 + /gene="P" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607639" + CDS 2265..2990 + /gene="P" + /codon_start=1 + /product="phosphoprotein" + /protein_id="YP_009518853.1" + /db_xref="GeneID:37607639" + /translation="MEKFAPEFHGEDANNRATKFLESIKGKFTSPKDPKKKDSIISVNS + IDIEVTKESPITSNSTIINPTNETDDTVGNKPNYQRKPLVSFKEDPTPSDNPFSKLYKE + TIETFDNNEEESSYSYEEINDQTNDNITARLDRIDEKLSEILGMLHTLVVASAGPTSAR + DGIRDAMVGLREEMIEKIRTEALMTNDRLEAMARLRNEESEKMAKDTSDEVSLNPTSEK + LNNLLEGNDSDNDLSLEDF*" + gene 3198..4161 + /gene="M" + /db_xref="GeneID:37607640" + mRNA 3198..4161 + /gene="M" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607640" + CDS 3207..3977 + /gene="M" + /codon_start=1 + /product="matrix protein" + /protein_id="YP_009518854.1" + /db_xref="GeneID:37607640" + /translation="METYVNKLHEGSTYTAAVQYNVLEKDDDPASLTIWVPMFQSSMPA + DLLIKELANVNILVKQISTPKGPSLRVMINSRSAVLAQMPSKFTICANVSLDERSKLAY + DVTTPCEIKACSLTCLKSKNMLTTVKDLTMKTLNPTHDIIALCEFENIVTSKKVIIPTY + LRSISVRNKDLNTLENITTTEFKNAITNAKIIPYSGLLLVITVTDNKGAFKYIKPQSQF + IVDLGAYLEKESIYYVTTNWKHTATRFAIKPMED*" + gene 4172..4589 + /gene="SH" + /db_xref="GeneID:37607641" + mRNA 4172..4589 + /gene="SH" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607641" + CDS 4256..4450 + /gene="SH" + /codon_start=1 + /product="small hydrophobic protein" + /protein_id="YP_009518855.1" + /db_xref="GeneID:37607641" + /translation="MENTSITIEFSSKFWPYFTLIHMITTIISLLIIISIMIAILNKLC + EYNVFHNKTFELPRARVNT*" + gene 4637..5639 + /gene="G" + /db_xref="GeneID:37607642" + mRNA 4637..5639 + /gene="G" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607642" + CDS 4652..5626 + /gene="G" + /codon_start=1 + /product="attachment glycoprotein" + /protein_id="YP_009518856.1" + /db_xref="GeneID:37607642" + /translation="MSKNKDQRTAKTLEKTWDTLNHLLFISSCLYKLNLKSIAQITLSI + LAMIISTSLIIAAIIFIASANHKVTLTTAIIQDATSQIKNTTPTYLTQNPQLGISFSNL + SEITSQTTTILASTTPRVKSTLQSTTVKTKNTTTTQIQPSKPTTKQRQNKPPNKPNNDF + HFEVFNFVPCSICSNNPTCWAICKRIPXXNKKPGKKTTTKPTKKPTIKTTKKDLKPQTT + KPKEVPTTKPTEKPTINTTKTNIITTLLTNNTTGNPEHTSQXXXXXXXXXXXXXXXXXX + XXXXXXKETFHSTSSEGNPSPSQVYTTSEYLSQPSSPSNTTNQ*" + gene 5693..7600 + /gene="F" + /db_xref="GeneID:37607643" + mRNA 5693..7600 + /gene="F" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607643" + CDS 5706..7430 + /gene="F" + /codon_start=1 + /product="fusion glycoprotein" + /protein_id="YP_009518857.1" + /db_xref="GeneID:37607643" + /translation="MELPILKTNAITTILAAVTFCFASSQNITEEFYQSTCSAVSKGYL + SALRTGWYTSVITIELSNIKENKCNGTDAKVKLIKQELDKYKNAVTELQLLMQSTPAAN + NRARRELPRFMNYTLNNTKKTNVTLSKKRKRRFLGFLLGVGSAIASGIAVSKVLHLEGE + VNKIKSALLSTNKAVVSLSNGVSVLTSKVLDLKNYIDKQLLPIVNKQSCSISNIETVIE + FQQKNNRLLEITREFSVNAGVTTPVSTYMLTNSELLSLINDMPITNDQKKLMSNNVQIV + RQQSYSIMSIIKEEVLAYVVQLPLYGVIDTPCWKLHTSPLCTTNTKEGSNICLTRTDRG + WYCDNAGSVSFFPQAETCKVQSNRVFCDTMNSLTLPSEVNLCNIDIFNPKYDCKIMTSK + TDVSSSVITSLGAIVSCYGKTKCTASNKNRGIIKTFSNGCDYVSNKGVDTVSVGNTLYY + VNKQEGKSLYVKGEPIINFYDPLVFPSDEFDASISQVNEKINQSLAFIRKSDELLHNVN + AGKSTTNIMITTIIIVIIVILLSLIAVGLLLYCKARSTPVTLSKDQLSGINNIAFSN*" + gene 7654..8614 + /gene="M2" + /db_xref="GeneID:37607644" + mRNA 7654..8614 + /product="22K/M2 protein gene" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + CDS 7663..8247 + /gene="M2-1" + /note="ORF 1, matrix protein 2" + /codon_start=1 + /product="M2-1 protein" + /protein_id="YP_009518858.1" + /db_xref="GeneID:37607644" + /translation="MSRRNPCKFEIRGHCLNGKRCHFSHNYFEWPPHALLVRQNFMLNR + ILKSMDKSIDTLSEISGAAELDRTEEYALGVVGVLESYIGSINNITKQSACVAMSKLLT + ELNSDDIKKLRDNEELNSPKIRVYNTVISYIESNRKNNKQTIHLLKRLPADVLKKTIKN + TLDIHKSITINNPKELTVSDTNDHAKNNDTT*" + CDS 8216..8488 + /gene="M2-2" + /note="ORF 2, RNA processivity factor" + /codon_start=1 + /product="M2-2 protein" + /protein_id="YP_009518859.1" + /db_xref="GeneID:37607644" + /translation="MTMPKIMILPDKYPCSINSILITSRCRVTMYNQKNTLYFNQNNQN + NHIYSPNQTFNEIHWTSQDLIDTIQNFLQHLGITDDIYTIYILVS*" + gene 8546..15052 + /gene="L" + /db_xref="GeneID:37607645" + mRNA 8546..15052 + /gene="L" + /experiment="experimental evidence, no additional details + recorded" + /citation=[2] + /function="viral polymerase" + /db_xref="GeneID:37607645" + CDS 8555..15052 + /gene="L" + /note="RNA dependant RNA polymerase; RdRp" + /codon_start=1 + /product="polymerase protein" + /protein_id="YP_009518860.1" + /db_xref="GeneID:37607645" + /translation="MDPIINGNSANVYLTDSYLKGVISFSECNALGSYIFNGPYLKNDY + TNLISRQNPLIEHINLKKLNITQSLISKYHKGEIKIEEPTYFQSLLMTYKSMTSSEQIT + TTNLLKKIIRRAIEISDVKVYAILNKLGLKEKDKIKSNNGQDEDNSVITTIIKDDILLA + VKDNQSHLKADKNHSTKQKDTIKTTLLKKLMCSMQHPPSWLIHWFNLYTKLNNILTQYR + SNEVKNHGFILIDNQTLNGFQFILNQYGCIVYHKELKRITVTTYNQFLTWKDISLSRLN + VCLITWISNCLNTLNKSLGLRCGFNNVILTQLFLYGDCILKLFHNEGFYIIKEVEGFIM + SLILNITEEDQFRKRFYNSMLNNITDAANKAQKNLLSRVCHTLLDKTVSDNIINGRWII + LLSKFLKLIKLAGDNNLNNLSELYFLFRIFGHPMVDERQAMDAVKVNCNETKFYLLSSL + SMLRGAFIYRIIKGFVNNYNRWPTLRNAIVLPLRWLTYYKLNTYPSLLELTERDLIVLS + GLRFYREFRLPKKVDLEMIINDKAISPPKNLIWTSFPRNYMPSHIQNYIEHEKLKFSES + DKSRRVLEYYLRDNKFNECDLYNCVVNQSYLNNPNHVVSLTGKERELSVGRMFAMQPGM + FRQVQILAEKMIAENILQFFPESLTRYGDLELQKILELKAGISNKSNRYNDNYNNYISK + CSIITDLSKFNQAFRYETSCICSDVLDELHGVQSLFSWLHLTIPHVTIICTYRHAPPYI + RDHIVDLNNVDEQSGLYRYHMGGIEGWCQKLWTIEAISLLDLISLKGKFSITALINGDN + QSIDISKPVRLMEGQTHAQADYLLALNSLKLLYKEYAGIGHKLKGTETYISRDMQFMSK + TIQHNGVYYPASIKKVLRVGPWINTILDDFKVSLESIGSLTQELEYRGESLLCSLIFRN + VWLYNQIALQLKNHALCNNKLYLDILKVLKHLKTFFNLDNIDTALTLYMNLPMLFGGGD + PNLLYRSFYRRTPDFLTEAIVHSVFILSYYTNHDLKDKLQDLSDDRLNKFLTCIITFDK + NPNAEFVTLMRDPQALGSERQAKITSEINRLAVTEVLSTAPNKIFSKSAQHYTTTEIDL + NDIMQNIEPTYPHGLRVVYESLPFYKAEKIVNLISGTKSITNILEKTSAIDLTDIDRAT + EMMRKNITLLIRIFPLDCNRDKREILSMENLSITELSKYVRERSWSLSNIVGVTSPSIM + YTMDIKYTTSTIASGIIIEKYNVNSLTRGERGPTKPWVGSSTQEKKTMPVYNRQVLTKK + QRDQIDLLAKLDWVYASIDNKDEFMEELSIGTLGLTYEKAKKLFPQYLSVNYLHRLTVS + SRPCEFPASIPAYRTTNYHFDTSPINRILTEKYGDEDIDIVFQNCISFGLSLMSVVEQF + TNVCPNRIILIPKLNEIHLMKPPIFTGDVDIHKLKQVIQKQHMFLPDKISLTQYVELFL + SNKTLKSGSHVNSNLILAHKISDYFHNTYILSTNLAGHWILIIQLMKDSKGIFEKDWGE + GYITDHMFINLKVFFNAYKTYLLCFHKGYGKAKLECDMNTSDLLCVLELIDSSYWKSMS + KVFLEQKVIKYILSQDASLHRVKGCHSFKLWFLKRLNVAEFTVCPWVVNIDYHPTHMKA + ILTYIDLVRMGLINIDRIYIKNKHKFNDEFYTSNLFYINYNFSDNTHLLTKHIRIANSE + LENNYNKLYHPTPETLENILTNPIKSNDKKTLNDYCIGKNVDSIMLPLLSNKKLIKSST + MIRTNYSKQDLYNLFPTVVIDKIIDHSGNTAKSNQLYTTTSHQISLVHNSTSLYCMLPW + HHINRFNFVFSSTGCKISIEYILKDLKIKDPNCIAFIGEGAGNLLLRTVVELHPDIRYI + YRSLKDCNDHSLPIEFLRLYNGHINIDYGENLTIPATDATNNIHWSYLHIKFAEPISLF + VCDAELPVTVNWSKIIIEWSKHVRKCKYCSSVNKCTLIVKYHAQDDIDFKLDNITILKT + YVCLGSKLKGSEVYLVLTIGPANVFPVFNVVQNAKLILSRTKNFIMPKKADKESIDANI + KSLIPFLCYPITKKGINTALSKLKSVVSGDILSYSIAGRNEVFSNKLINHKHMNILKWF + NHVLNFRSTELNYNHLYMVESTYPYLSELLNSLTTNELKKLIKITGSLLYNFHNE*" +ORIGIN + 1 atgggcagca attcattgag tatgataaaa gttagattac aaaatttgtt tgacaatgat + 61 gaagtagcat tgttaaaaat aacatgctat actgacaaat taatacattt aactaatgct + 121 ttggctaagg cagtgataca tacaatcaaa ttgaatggca ttgtatttgt gcatgttatt + 181 acaagtagtg atatttgccc taataataat attgtagtaa aatccaattt cacaacaatg + 241 ccagtgttac aaaatggagg ttatatatgg gaaatgatgg aattaacaca ttgctctcaa + 301 cctaatggtc taatagatga caattgtgaa attaaattct ccaaaaaact aagtgattca + 361 acaatgacca attatatgaa tcaattatct gaattacttg gatttgatct taatccataa + 421 attataataa atatcaacta gcaaatcaat gtcactaaca ccattagtta atatnnnaaa + 481 annncttaac agaagataaa aatggggcaa ataaatcaac tcagccaacc caaccatgga + 541 cacaacacac aatgatacca caccacaaag actgatgatc acagacatga gaccattgtc + 601 acttgagact ataataacat cactaaccag agacatcata acacacagat ttatatactt + 661 gataaatcat gaatgcatag tgagaaaact tgatgaaaga caggccacat ttacattcct + 721 ggtcaactat gaaatgaaac tattgcacaa agtaggaagc actaaatata aaaaatatac + 781 tgaatacaac acaaaatatg gcactttccc tatgccaata tttatcaatc atgatgggtt + 841 cttagaatgc attggcatta agcctacaaa gcatactccc ataatataca agtatgatct + 901 caatccatga atttcaacac aagattcaca caatccaaaa taacaacttt atgcataact + 961 acactccata gtccaaatgg agcctgaaaa ttatagtaat ttnnaaaatt aaggagagac + 1021 ataagataaa agatggggca aatacaaaaa tggctcttag caaagtcaag ttgaatgata + 1081 cactcaacaa agatcaactt ctgtcatcta gcaaatacac catccaacgg agcacaggag + 1141 atagtattga tactcctaat tatgatgtgc agaaacacat caataagtta tgtggcatgt + 1201 tattaatcac agaagatgct aatcataaat tcactgggtt aataggtatg ttatatgcta + 1261 tgtctagatt aggaagagaa gacaccataa aaatactcag agatgcggga tatcatgtaa + 1321 aagcaaatgg agtagatgta acaacacatc gtcaagacat taatgggaaa gaaatgaaat + 1381 ttgaagtgtt aacattggca agcttaacaa ctgaaattca aatcaacatt gagatagaat + 1441 ctagaaaatc ctacaaaaaa atgctaaaag aaatgggaga ggtagctcca gaatacaggc + 1501 atgactctcc tgattgtggg atgataatat tatgtatagc agcattagta ataaccaaat + 1561 tagcagcagg ggatagatct ggtcttacag ctgtgattag gagagctaat aatgtcctaa + 1621 aaaatgaaat gaaacgttat aaaggcttac tacccaagga tatagccaac agcttctatg + 1681 aagtgtttga aaaatatcct cactttatag atgtttttgt tcattttggt atagcacaat + 1741 cttctaccag aggtggcagt agagttgaag ggatttttgc aggattgttt atgaatgcct + 1801 atggtgcagg gcaagtgatg ttacggtggg gagtcttagc aaaatcagtt aaaaatatta + 1861 tgttaggaca tgctagtgtg caagcagaaa tggaacaagt tgttgaggtt tatgaatatg + 1921 cccaaaaatt gggtggagaa gcaggattct accatatatt gaacaaccca aaagcatcat + 1981 tattatcttt gactcaattt cctcacttct ccagtgtagt attaggcaat gctgctggcc + 2041 taggcataat gggagaatac agaggtacac caaggaatca agatctatat gatgcagcaa + 2101 aggcatatgc tgaacaactc aaagaaaatg gtgtgattaa ctacagtgta ttagacttga + 2161 cagcagaaga actagaggct atcaaacatc agcttaatcc aaaagataat gatgtagagc + 2221 tttgagttaa tnnannnnna aaaaantggg gcaaataaat catcatggaa aagtttgctc + 2281 ctgaattcca tggagaagat gcaaacaaca gagctactaa attcctagaa tcaataaagg + 2341 gcaaattcac atcacctaaa gatcccaaga aaaaagatag tatcatatct gtcaactcaa + 2401 tagatataga agtaaccaaa gaaagcccta taacatcaaa ttcaaccatt ataaacccaa + 2461 caaatgagac agatgatact gtagggaaca agcccaatta tcaaagaaaa cctctagtaa + 2521 gtttcaaaga agaccctaca ccaagtgata atcccttttc aaaactatac aaagaaacca + 2581 tagaaacatt tgataacaat gaagaagaat ctagctattc atatgaagaa ataaatgatc + 2641 agacaaacga taatataaca gcaagattag ataggattga tgaaaaatta agtgaaatac + 2701 taggaatgct tcacacatta gtagtagcaa gtgcaggacc tacatctgct cgggatggta + 2761 taagagatgc catggttggt ttaagagaag aaatgataga aaaaatcaga actgaagcat + 2821 taatgaccaa tgacagatta gaagctatgg caagactcag gaatgaggaa agtgaaaaga + 2881 tggcaaaaga cacatcagat gaagtgtctc tcaatccaac atcagagaaa ttgaacaacc + 2941 tgttggaagg gaatgatagt gacaatgatc tatcacttga agatttctga ttagttacca + 3001 atctgcacat caacacacaa caccaacaga agaccaacaa acaaaccaac tcactcatcc + 3061 aaccaaacat ccatctgcca atcagcnnnn nnnnnnnnnn nncaaacagc cnnaaaaann + 3121 naacaaccag ccaatccaaa actagccacc cnggnaaaaa atcgacaata tagttacaaa + 3181 annnnnaaga aaaagatggg gcaaatatgg aaacatacgt gaacaaactt cacgaaggct + 3241 ccacatacac agctgctgtt caatacaatg tcctagaaaa agacgatgac cctgcatcac + 3301 ttacaatatg ggtgcccatg ttccaatcat ctatgccagc agatttactt ataaaagaac + 3361 tagctaatgt caacatacta gtgaaacaaa tatccacacc caagggacct tcattaagag + 3421 tcatgataaa ctcaagaagt gcagtgctag cacaaatgcc cagcaaattt accatatgtg + 3481 ctaatgtgtc cttggatgaa agaagcaaac tggcatatga tgtaaccaca ccctgtgaaa + 3541 tcaaggcatg tagtctaaca tgcctaaaat caaaaaatat gttaactaca gttaaagatc + 3601 tcactatgaa aacactcaac ccaacacatg atattattgc tttatgtgaa tttgaaaata + 3661 tagtaacatc aaaaaaagtc ataataccaa catacctaag atccatcagt gtcagaaata + 3721 aagatctgaa cacacttgaa aatataacaa ccactgaatt caaaaatgcc atcacaaatg + 3781 caaaaatcat cccttactca ggattactat tagtcatcac agtgactgac aacaaaggag + 3841 cattcaaata cataaagcca caaagtcaat tcatagtaga tcttggagct tacctagaaa + 3901 aagaaagtat atattatgtt acaacaaatt ggaagcacac agctacacga tttgcaatca + 3961 aacccatgga agattaaccn tttttcctct acatcagtga gttaattcat acaaactttc + 4021 tacctacatt cttcacttca caatcataat cacaaacact ctgtggttca accaatcnnn + 4081 nnaaacaaaa cttatctgaa gtctcagatc atcccaagtc attgttcatc agatctagta + 4141 ctcaaataag ttaatnaaaa atatnacaca tggggcaaat aatcatcgga ggaaatccaa + 4201 ctaatcacaa tatctgtcaa catagacaag tcaacacacc agacaaaatc aaccaatgga + 4261 aaatacatcc ataacaatag aattctcaag caaattctgg ccttacttta cactaataca + 4321 catgatcaca acaataatct ctttgctaat cataatctcc atcatgattg caatactaaa + 4381 caaactctgt gaatataacg tattccataa caaaaccttt gagttaccaa gagctcgagt + 4441 caatacatag cattcatcaa tctaatagct caaaacagta accttgcatt taaaagtgaa + 4501 caaccctcac ctctttacaa aaccacatca acatctcacc atgcaaacca tcatccatac + 4561 tataaagtag ttaattnnnn aaaannnnat agtcataaca atgaactnna agatattaag + 4621 actnaacaat aacgttgggg caaatgcaaa catgtccaaa aacaaggacc aacgcaccgc + 4681 taagacacta gaaaagacct gggacactct caatcattta ttattcatat catcgtgctt + 4741 atataagtta aatcttaaat ctatagcaca aatcacatta tccattctgg caatgataat + 4801 ctcaacttca cttataattg cagccatcat attcatagcc tcggcaaacc acaaagtcac + 4861 actaacaact gcaatcatac aagatgcaac aagccagatc aagaacacaa ccccaacata + 4921 cctcacccag aatcctcagc ttggaatcag cttctccaat ctgtctgaaa ttacatcaca + 4981 aaccaccacc atactagctt caacaacacc aagagtcaag tcaaccctgc aatccacaac + 5041 agtcaagacc aaaaacacaa caacaaccca aatacaaccc agcaagccca ccacaaaaca + 5101 acgccaaaac aaaccaccaa acaaacccaa taatgatttt cactttgaag tgttcaactt + 5161 tgtaccctgc agcatatgca gcaacaatcc aacctgctgg gctatctgca aaagaatacc + 5221 nnnnnnaaac aaaaaaccag gaaagaaaac caccaccaag cccacaaaaa aaccaaccat + 5281 caagacaacc aaaaaagatc tcaaacctca aaccacaaaa ccaaaggaag tacctaccac + 5341 caagcccaca gaaaagccaa ccatcaacac caccaaaaca aacatcataa ctacactgct + 5401 caccaacaat accacaggaa atccagaaca cacaagtcaa nnnnnnnnnn nnnnnnnnnn + 5461 nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnaaggaaac + 5521 cttccactca acctcctccg aaggcaatcc aagcccttca caagtctata caacatccga + 5581 gtacctatca caaccttcat ctccatccaa cacaacaaac cagtagttat tnnaaaaaac + 5641 atattattac naaaaagcca tgaccaaatc aaacagaatc aaaataaact ctggggcaaa + 5701 taacaatgga gttgccaatc ctcaaaacaa atgcaattac cacaatcctt gctgcagtca + 5761 cattttgttt tgcttctagt caaaacatca ctgaagaatt ttatcaatca acatgcagtg + 5821 cagttagcaa aggctatctt agtgctctaa gaactggttg gtatactagt gttataacta + 5881 tagaattaag taatatcaag gaaaataagt gtaatggaac agatgctaag gtaaaattga + 5941 taaaacaaga attagataaa tataaaaatg ctgtaacaga attgcagttg ctcatgcaaa + 6001 gcacaccagc agcaaacaat cgagccagaa gagaactacc aaggtttatg aattatacac + 6061 tcaacaatac caaaaaaacc aatgtaacat taagcaagaa aaggaaaaga agatttcttg + 6121 gttttttgtt aggtgttgga tctgcaatcg ccagtggcat tgctgtatct aaggtcctgc + 6181 acctagaagg ggaagtgaac aaaatcaaaa gtgctctact atccacaaac aaggctgtag + 6241 tcagcttatc aaatggagtt agtgtcttaa ccagcaaagt gttagacctc aaaaactata + 6301 tagataaaca attgttacct attgtgaaca agcaaagctg cagcatatca aatatagaaa + 6361 ctgtgataga gttccaacaa aagaacaaca gactactaga gattaccagg gaatttagtg + 6421 ttaatgcagg tgtaactaca cctgtaagca cttatatgtt aactaatagt gaattattat + 6481 cattaatcaa tgatatgcct ataacaaatg atcagaaaaa gttaatgtcc aacaatgttc + 6541 aaatagttag acagcaaagt tactctatca tgtccataat aaaagaggaa gtcttagcat + 6601 atgtagtaca attaccacta tatggtgtaa tagatacacc ttgttggaaa ttacacacat + 6661 cccctctatg tacaaccaac acaaaagaag ggtccaacat ctgtttaaca agaaccgaca + 6721 gaggatggta ctgtgacaat gcaggatcag tatctttctt cccacaagct gaaacatgta + 6781 aagttcaatc gaatcgagta ttttgtgaca caatgaacag tttaacatta ccaagtgaag + 6841 taaatctctg caatattgac atattcaatc ccaaatatga ttgtaaaatt atgacttcaa + 6901 aaacagatgt aagcagctcc gttatcacat ctctaggagc cattgtgtca tgctatggca + 6961 aaactaaatg tacagcatcc aataaaaatc gtggaatcat aaagacattt tctaacgggt + 7021 gtgattatgt atcaaataaa ggggtggaca ctgtgtctgt aggtaataca ttatattatg + 7081 taaataagca agaaggcaaa agtctctatg taaaaggtga accaataata aatttctatg + 7141 acccattagt attcccctct gatgaatttg atgcatcaat atctcaagtc aatgagaaga + 7201 ttaaccagag cctagcattt attcgtaaat ccgatgaatt attacataat gtaaatgctg + 7261 gtaaatccac cacaaatatc atgataacta ctataattat agtgattata gtaatattgt + 7321 tatcattaat tgcagttgga ctgctcctat actgtaaggc cagaagcaca ccagtcacac + 7381 taagcaagga tcaactgagt ggtataaata atattgcatt tagtaactga ataaaaatag + 7441 cacctaatca tgttcttaca atggtttact atctgatcat agacaaccca tctatcattg + 7501 gattttcttn naaaatctga acttcatcga aactctcatc tataaaccat ctcacttaca + 7561 ctatttaagt agattcctat tttatagtta tatnnnaaaa cacaattgaa taccagatta + 7621 acttactatt tgnnnnnntn aaaaatgaga actggggcaa atatgtcacg aaggaatcct + 7681 tgcaaatttg aaattcgagg tcattgcttg aatggtaaga ggtgtcattt tagtcataat + 7741 tattttgaat ggccacccca tgcactgctt gtaagacaaa actttatgtt aaacagaata + 7801 cttaagtcta tggataaaag tatagatact ttatcagaaa taagtggagc tgcagagttg + 7861 gacagaacag aagagtatgc tcttggtgta gttggagtgc tagagagtta tataggatca + 7921 ataaataata taactaaaca atcagcatgt gttgccatga gcaaactcct cactgaactc + 7981 aatagtgatg atatcaaaaa actaagggac aatgaagagc taaattcacc caagataaga + 8041 gtgtacaata ctgtcatatc atatattgaa agcaacagga aaaacaataa acaaactatc + 8101 catctgttaa aaagattgcc agcagacgta ttgaagaaaa ccatcaaaaa cacattggat + 8161 atccacaaga gcataaccat caataaccca aaagaattaa ctgttagtga tacaaatgac + 8221 catgccaaaa ataatgatac tacctgacaa atatccttgt agtataaatt ccatactaat + 8281 aacaagtaga tgtagagtta ctatgtataa tcaaaagaac acactatatt tcaatcaaaa + 8341 caaccaaaat aaccatatat actcaccgaa tcaaacattc aatgaaatcc attggacctc + 8401 tcaagacttg attgatacaa ttcaaaattt tctacaacat ctaggtatta ctgatgatat + 8461 atatacaata tatatattag tgtcataaca ctcaatccta atacttacca catcattaaa + 8521 ttattaactc aaacaattca agctatggga caaaatggat cccattatta atggaaattc + 8581 tgctaatgtt tatctaaccg atagttattt aaaaggtgtt atttctttct cagaatgtaa + 8641 tgctttagga agttacatat tcaatggtcc ttatctcaaa aatgattata ccaacttaat + 8701 tagtagacaa aatccattaa tagaacacat aaatctaaag aaactaaata taacacagtc + 8761 cttaatatct aagtatcata aaggtgaaat aaaaatagaa gaacctactt attttcagtc + 8821 attacttatg acatacaaga gtatgacctc gtcagaacag attactacca ctaatttact + 8881 taaaaagata ataagaagag ctatagaaat aagtgatgtc aaagtctatg ctatattgaa + 8941 taaactaggg cttaaagaaa aagacaagat taaatccaac aatggacaag atgaagacaa + 9001 ctcagttatt acaaccataa tcaaagatga tatactttta gctgttaagg ataatcaatc + 9061 tcatcttaaa gcagacaaaa atcactctac aaaacaaaaa gatacaatca aaacaacact + 9121 cttgaagaaa ttaatgtgtt caatgcaaca tcctccatca tggttaatac attggtttaa + 9181 tttatacaca aaattaaaca acatattaac acagtatcga tcaaatgagg taaaaaacca + 9241 tggttttata ttgatagata atcaaactct taatggattt caatttattt tgaatcaata + 9301 tggttgtata gtttatcata aggaactcaa aagaattact gtgacaacct ataatcaatt + 9361 cttgacatgg aaagatatta gccttagtag attaaatgtt tgtttaatta catggattag + 9421 taactgtttg aacacattaa ataaaagctt aggcttaaga tgcggattca ataatgttat + 9481 cttgacacaa ctattccttt atggagattg tatactaaaa ctatttcaca atgaggggtt + 9541 ctacataata aaagaggtag agggatttat tatgtctcta attttaaata taacagaaga + 9601 agatcaattc agaaaacgat tttataatag tatgctcaac aacatcacag atgctgctaa + 9661 taaagctcag aaaaatctgc tatcaagagt atgtcataca ttattagata agacagtatc + 9721 cgataatata ataaatggca gatggataat tctattaagt aagttcctta aattaattaa + 9781 gcttgcaggt gacaataacc ttaacaatct gagtgaatta tattttttgt tcagaatatt + 9841 tggacaccca atggtagatg aaagacaagc catggatgct gttaaagtta attgcaatga + 9901 gaccaaattt tacttgttaa gcagtttgag tatgttaaga ggtgccttta tatatagaat + 9961 tataaaaggg tttgtaaata attacaacag atggcctact ttaagaaatg ctattgtttt + 10021 acccttaaga tggttaactt actataaact aaacacttat ccttctttgt tggaacttac + 10081 agaaagagat ttgattgttt tatcaggact acgtttctat cgtgagtttc ggttgcctaa + 10141 aaaagtggat cttgaaatga ttataaatga taaagctata tcacctccta aaaatttgat + 10201 atggactagt ttccctagaa attatatgcc gtcacacata caaaattata tagaacatga + 10261 aaaattaaaa ttttccgaga gtgataaatc aagaagagta ttagagtatt atttaagaga + 10321 taacaaattc aatgaatgtg atttatacaa ctgtgtagtt aatcaaagtt atcttaacaa + 10381 ccctaatcat gtggtatcat tgacaggcaa agaaagagaa ctcagtgtag gtagaatgtt + 10441 tgcaatgcaa ccaggaatgt tcagacaagt tcaaatattg gcagagaaaa tgatagctga + 10501 aaacatttta caattctttc ctgaaagtct tacaagatat ggtgatctag aactacaaaa + 10561 aatattagaa ttgaaagcag gaataagtaa caaatcaaat cgttacaatg ataattacaa + 10621 caattacatt agtaagtgct ctatcatcac agatctcagc aaattcaatc aagcatttcg + 10681 atatgaaaca tcatgtattt gtagtgatgt actggatgaa ctgcatggtg tacaatctct + 10741 attttcctgg ttacatttaa ctattcctca tgtcacaata atatgcacat ataggcatgc + 10801 acccccctat ataagagatc atattgtaga tcttaacaat gtagatgaac aaagtggatt + 10861 atatagatat catatgggtg gtatcgaagg gtggtgtcaa aaactatgga ccatagaagc + 10921 tatatcacta ttggatctaa tatctctcaa agggaaattc tcaattactg ctttaattaa + 10981 tggtgacaat caatcaatag atataagtaa accagtcaga ctcatggaag gtcaaactca + 11041 tgctcaagca gattatttgc tagcattaaa tagtcttaaa ttactgtata aagagtatgc + 11101 aggcataggc cacaaattaa aaggaactga gacttatata tcaagagata tgcaatttat + 11161 gagtaaaaca attcaacata acggtgtata ttacccagct agtataaaga aagtcctaag + 11221 agtgggaccg tggataaaca ctatacttga tgatttcaaa gtgagtctag aatctatagg + 11281 tagtttgaca caagaattag aatatagagg tgaaagtcta ttatgcagtt taatatttag + 11341 aaatgtatgg ttatataatc aaattgcttt acaattaaaa aatcatgcat tatgtaacaa + 11401 taaattatat ttggacatat taaaggttct gaaacactta aaaacctttt ttaatcttga + 11461 taatattgat acagcattaa cattgtatat gaatttaccc atgttatttg gtggtggtga + 11521 tcccaacttg ttatatcgaa gtttctatag aagaactcct gatttcctca cagaggctat + 11581 agttcactct gtgttcatac ttagttatta tacaaaccat gatttaaaag ataaacttca + 11641 agatctgtca gatgatagat tgaataagtt cttaacatgc ataatcacgt ttgacaaaaa + 11701 ccctaatgct gaattcgtaa cattgatgag agatcctcaa gctttagggt ctgagagaca + 11761 agctaaaatt actagcgaaa tcaatagact ggcagttaca gaggttttga gtacagctcc + 11821 aaacaaaata ttctccaaaa gtgcacaaca ttataccact acagagatag atctaaatga + 11881 tattatgcaa aatatagaac ctacatatcc tcacgggcta agagttgttt atgaaagttt + 11941 acccttttat aaagcagaga aaatagtaaa tcttatatca ggtacaaaat ctataactaa + 12001 catactggaa aagacttctg ccatagactt aacagatatt gatagagcca ctgagatgat + 12061 gaggaaaaac ataactttgc ttataaggat atttccatta gattgtaaca gagataaaag + 12121 agaaatattg agtatggaaa acctaagtat tactgaatta agcaaatatg ttagagaaag + 12181 atcttggtct ttatccaata tagttggtgt tacatcaccc agtatcatgt atacaatgga + 12241 catcaaatat acaacaagca ctatagctag tggcataatc atagagaaat ataatgttaa + 12301 cagtttaaca cgtggtgaga gaggacccac taaaccatgg gttggttcat ctacacaaga + 12361 gaaaaaaaca atgccagttt ataatagaca agttttaacc aaaaaacaga gagatcaaat + 12421 agatctatta gcaaaattgg attgggtgta tgcatctata gataacaagg atgaattcat + 12481 ggaagaactt agcataggaa ctcttgggtt aacatatgaa aaggccaaaa aattatttcc + 12541 acaatattta agtgttaact atttgcatcg ccttacagtc agtagtagac catgtgaatt + 12601 ccctgcatca ataccagctt atagaacaac aaattatcac tttgatacta gccctattaa + 12661 tcgcatatta acagaaaagt atggtgatga agatattgat atagtattcc aaaactgtat + 12721 aagctttggc cttagcttaa tgtcagtagt agaacaattt actaatgtat gtcctaacag + 12781 aattattctc atacctaagc ttaatgagat acatttgatg aaacctccca tattcacagg + 12841 tgatgttgat attcacaagt taaaacaagt gatacaaaaa cagcatatgt ttttaccaga + 12901 caaaataagt ttgactcaat atgtggaatt attcttaagt aataaaacac tcaaatctgg + 12961 atctcatgtt aattctaatt taatattggc acataaaata tctgactatt ttcataatac + 13021 ttacatttta agtactaatt tagctggaca ttggattctg attatacaac ttatgaaaga + 13081 ttctaagggt atttttgaaa aagattgggg agagggatat ataactgatc atatgtttat + 13141 taatttgaaa gttttcttca atgcttataa gacctatctc ttgtgttttc ataaaggtta + 13201 tggcaaagca aagctggagt gtgatatgaa tacttcagat ctcctatgtg tattggaatt + 13261 aatagacagt agttattgga agtctatgtc taaggtattt ttagaacaaa aagttatcaa + 13321 atacattctt agccaagatg caagtttaca tagagtaaaa ggatgtcata gcttcaaatt + 13381 atggtttctt aaacgtctta atgtagcaga attcacagtt tgcccttggg ttgttaacat + 13441 agattatcat ccaacacata tgaaagcaat attaacttat atagatcttg ttagaatggg + 13501 attgataaat atagatagaa tatacattaa aaataaacac aaattcaatg atgaatttta + 13561 tacttctaat ctcttttaca ttaattataa cttctcagat aatactcatc tattaactaa + 13621 acatataagg attgctaatt ctgaattaga aaataattac aacaaattat atcatcctac + 13681 accagaaacc ctagaaaata tactaaccaa tccgattaaa agtaatgaca aaaagacact + 13741 gaatgactat tgtataggta aaaatgttga ctcaataatg ttaccattgt tatctaataa + 13801 gaagcttatt aaatcgtcta caatgattag aaccaattac agcaaacaag atttgtataa + 13861 tttatttcct acggttgtga ttgataaaat tatagatcat tcaggtaata cagccaaatc + 13921 taaccaactt tacactacta cttctcatca aatatcttta gtgcacaata gcacatcact + 13981 ttattgcatg cttccttggc atcatattaa tagattcaat tttgtattta gttctacagg + 14041 ttgtaaaatt agtatagagt atattttaaa agaccttaaa attaaagatc ctaattgtat + 14101 agcattcata ggtgaaggag cagggaattt attattgcgt acagtagtgg aacttcatcc + 14161 tgacataaga tatatttaca gaagtctgaa agattgcaat gatcatagtt tacctattga + 14221 gtttttaagg ctgtacaatg gacatatcaa cattgattat ggtgaaaatt tgaccattcc + 14281 tgctacagat gcaaccaaca acattcattg gtcttattta catataaagt ttgctgaacc + 14341 tatcagtctt tttgtctgtg atgctgaatt gcctgtaaca gtcaactgga gtaaaattat + 14401 aatagaatgg agcaagcatg taagaaaatg caagtactgt tcctcagtta ataaatgtac + 14461 gttaatagta aaatatcatg ctcaagatga tattgatttc aaattagaca atataactat + 14521 attaaaaact tatgtatgct taggcagtaa gttaaaggga tctgaagttt acttagtcct + 14581 tacaataggt cctgcaaatg tattcccagt atttaatgta gtacaaaatg ctaaattgat + 14641 actatcaaga accaaaaatt tcatcatgcc taagaaagct gataaagagt ctattgatgc + 14701 aaatattaaa agtttgatac cctttctttg ttaccctata acaaaaaaag gaattaatac + 14761 tgcattgtca aaactaaaga gtgttgttag tggagatata ctatcatatt ctatagctgg + 14821 acgtaatgaa gttttcagca ataaacttat aaatcataag catatgaaca tcttaaagtg + 14881 gttcaatcat gttttaaatt tcagatcaac agaactaaac tataatcatt tatatatggt + 14941 agaatctaca tatccttatc taagtgaatt gttaaacagc ttgacaacta atgaacttaa + 15001 aaaactgatt aaaatcacag gtagtttgtt atacaacttt cataatgaat aa +// diff --git a/rsv/profiles/pango/REFROOTB.gb b/rsv/profiles/pango/REFROOTB.gb new file mode 100644 index 0000000..ccaaa97 --- /dev/null +++ b/rsv/profiles/pango/REFROOTB.gb @@ -0,0 +1,479 @@ +LOCUS REFROOTB 15033 bp RNA UNK 01-JAN-1980 +DEFINITION REFROOTB. +ACCESSION REFROOTB +VERSION REFROOTB +KEYWORDS . +SOURCE . + ORGANISM Human orthopneumovirus + Viruses; Riboviria; Orthornavirae; Negarnaviricota; Haploviricotina; + Monjiviricetes; Mononegavirales; Pneumoviridae; Orthopneumovirus. +FEATURES Location/Qualifiers + source 1..15033 + /organism="Human orthopneumovirus" + /mol_type="genomic RNA" + gene 1..479 + /gene="NS1" + /db_xref="GeneID:1489818" + mRNA 1..479 + /gene="NS1" + /db_xref="GeneID:1489818" + CDS 1..420 + /gene="NS1" + /codon_start=1 + /product="nonstructural protein 1" + /protein_id="NP_056856.1" + /db_xref="GeneID:1489818" + /translation="MGCNSLSMIKVRLQNLFDNDEVALLKITCYTDKLILLTNALAKAA + IHTIKLNGIVFIHVITSSEVCPDNNIVVKSNFTTMPILQNGGYIWELIELTHCSQLNGL + MDDNCEIKFSKRLSDSVMTDYMNQISDLLGLDLNS*" + gene 496..1002 + /gene="NS2" + /db_xref="GeneID:1489819" + mRNA 496..1002 + /gene="NS2" + /db_xref="GeneID:1489819" + CDS 528..902 + /gene="NS2" + /codon_start=1 + /product="nonstructural protein 2" + /protein_id="NP_056857.1" + /db_xref="GeneID:1489819" + /translation="MSTTNDNTTMQRLMITDMRPLSMESIITSLTKEIITHKFIYLINN + ECIVRKLDERQATFTFLVNYEMKLLHKVGSTKYKKYTEYNTKYGTFPMPIFINHGGFLE + CIGIKPTKHTPIIYKYDLNP*" + gene 1029..2233 + /gene="N" + /db_xref="GeneID:1489820" + mRNA 1029..2233 + /gene="N" + /db_xref="GeneID:1489820" + CDS 1044..2219 + /gene="N" + /codon_start=1 + /product="nucleoprotein" + /protein_id="NP_056858.1" + /db_xref="GeneID:1489820" + /translation="MALSKVKLNDTLNKDQLLSSSKYTIQRSTGDNIDTPNYDVQKHLN + KLCGMLLITEDANHKFTGLIGMLYAMSRLGREDTIKILKDAGYHVKANGVDITTYRQDI + NGKEMKFEVLTLSSLTSEIQVNIEIESRKSYKKMLKEMGEVAPEYRHDSPDCGMIILCI + AALVITKLAAGDRSGLTAVIRRANNVLKNEIKRYKGLIPKDIANSFYEVFEKHPHLIDV + FVHFGIAQSSTRGGSRVEGIFAGLFMNAYGSGQVMLRWGVLAKSVKNIMLGHASVQAEM + EQVVEVYEYAQKLGGEAGFYHILNNPKASLLSLTQFPNFSSVVLGNAAGLGIMGEYRGT + PRNQDLYDAAKAYAEQLKENGVINYSVLDLTAEELEAIKHQLNPKEDDVEL*" + gene 2237..3176 + /gene="P" + /db_xref="GeneID:1489821" + mRNA 2237..3176 + /gene="P" + /db_xref="GeneID:1489821" + CDS 2254..2979 + /gene="P" + /codon_start=1 + /product="phosphoprotein" + /protein_id="NP_056859.1" + /db_xref="GeneID:1489821" + /translation="MEKFAPEFHGEDANNKATKFLESIKGKFASSKDPKKKDSIISVNS + IDIEVTKESPITSGTNIINPISEADSTPETKANYPRKPLVSFKEDLTPSDNPFSKLYKE + TIETFDNNEEESSYSYEEINDQTNDNITARLDRIDEKLSEILGMLHTLVVASAGPTSAR + DGIRDAMVGLREEMIEKIRAEALMTNDRLEAMARLRNEESEKMAKDTSDEVSLNPTSKK + LSDLLEDNDSDNDLSLDDF*" + gene 3186..4140 + /gene="M" + /db_xref="GeneID:1489822" + mRNA 3186..4140 + /gene="M" + /db_xref="GeneID:1489822" + CDS 3195..3965 + /gene="M" + /codon_start=1 + /product="matrix protein" + /protein_id="NP_056860.1" + /db_xref="GeneID:1489822" + /translation="METYVNKLHEGSTYTAAVQYNVLEKDDDPASLTIWVPMFQSSVPA + DLLIKELASINILVKQISTPKGPSLRVTINSRSAVLAQMPSNFIISANVSLDERSKLAY + DVTTPCEIKACSLTCLKVKSMLTTVKDLTMKTFNPTHEIIALCEFENIMTSKRVIIPTY + LRSISVKNKDLNSLENIATTEFKNAITNAKIIPYAGLVLVITVTDNKGAFKYIKPQSQF + IVDLGAYLEKESIYYVTTNWKHTATRFSIKPLED*" + gene 4150..4570 + /gene="SH" + /db_xref="GeneID:1489823" + mRNA 4150..4570 + /gene="SH" + /db_xref="GeneID:1489823" + CDS 4235..4432 + /gene="SH" + /codon_start=1 + /product="small hydrophobic protein" + /protein_id="NP_056861.1" + /db_xref="GeneID:1489823" + /translation="MGNTSITIEFTSKFWPYFTLIHMILTLISLLIIITIMIAILNKLS + EHKTFCNKTLELGQMYQINT*" + gene 4621..5612 + /gene="G" + /db_xref="GeneID:1489824" + mRNA 4621..5612 + /gene="G" + /db_xref="GeneID:1489824" + CDS 4636..5601 + /gene="G" + /codon_start=1 + /product="attachment glycoprotein" + /protein_id="NP_056862.1" + /db_xref="GeneID:1489824" + /translation="MSKHKNQRTARTLEKTWDTLNHLIVISSCLYRLNLKSIAQIALSV + LAMIISTSLIIAAIIFIISANHKVTLTTVTVQTIKNHTEKNITTYLTQVSPERVSSSKQ + PTTTSPIHTNSATISPNTKSETHHTTAQTKGRITTSTQTNKPSTKSRSKNPPKKPKDDY + HFEVFNFVPCSICGNNQLCKSICKTIPSNKPKKKPTIKPTNKPTIKTTNKRDPKTPAKM + PKKEXXXTTNPTKKPTLKTTERDXXXXXXXXXXXXXXXXXXXXXSTSQSTVLDTTTSKH + TIQQQYLHSTTSENTPNSTQIPTASEPSTSNST*KT*SHT*" + gene 5665..7570 + /gene="F" + /db_xref="GeneID:1489825" + mRNA 5665..7570 + /gene="F" + /db_xref="GeneID:1489825" + CDS 5678..7402 + /gene="F" + /codon_start=1 + /product="fusion glycoprotein" + /protein_id="NP_056863.1" + /db_xref="GeneID:1489825" + /translation="MELLIHRSSAIFLTLAINALYLTSSQNITEEFYQSTCSAVSRGYF + SALRTGWYTSVITIELSNIKETKCNGTDTKVKLIKQELDKYKNAVTELQLLMQNTPAVN + NRARREAPQYMNYTINTTKNLNVSISKKRKRRFLGFLLGVGSAIASGIAVSKVLHLEGE + VNKIKNALLSTNKAVVSLSNGVSVLTSKVLDLKNYINNQLLPIVNKQSCRISNIETVIE + FQQKNSRLLEITREFSVNAGVTTPLSTYMLTNSELLSLINDMPITNDQKKLMSSNVQIV + RQQSYSIMSIIKEEVLAYVVQLPIYGVIDTHCWKLHTSPLCTTNIKEGSNICLTRTDRG + WYCDNAGSVSFFPQADTCKVQSNRVFCDTMNSLTLPSEVSLCNTDIFNSKYDCKIMTSK + TDISSSVITSLGAIVSCYGKTKCTASNKNRGIIKTFSNGCDYVSNKGVDTVSVGNTLYY + VNKLEGKNLYVKGEPIINYYDPLVFPSDEFDASISQVNEKINQSLAFIRRSDELLHNVN + TGKSTTNIMITTIIIVIIVVLLSLIAIGLLLYCKAKNTPVTLSKDQLSGINNIAFSK*" + gene 7627..8586 + /gene="M2" + /db_xref="GeneID:1489826" + mRNA 7627..8586 + /gene="M2" + /db_xref="GeneID:1489826" + CDS 7636..8223 + /gene="M2-1" + /note="ORF 1, matrix protein 2" + /codon_start=1 + /product="M2-1 protein" + /protein_id="NP_056864.1" + /db_xref="GeneID:1489826" + /translation="MSRRNPCKFEIRGHCLNGRRCHYSHNYFEWPPHALLVRQNFMLNK + ILKSMDKSIDTLSEISGAAELDRTEEYALGIVGVLESYIGSINNITKQSACVAMSKLLI + EINSDDIKKLRDNEEPNSPKIRVYNTVISYIESNRKNNKQTIHLLKRLPADVLKKTIKN + TLDIHKSITISNPKESTVNDQNDQTKNNDITG*" + CDS 8189..8461 + /gene="M2-2" + /note="ORF 2, RNA processivity factor" + /codon_start=1 + /product="M2-2 protein" + /protein_id="NP_056865.3" + /db_xref="GeneID:1489826" + /translation="MTKPKIMILPDKYPCSISSILISSESMVATFNHKNILQFNYNHLD + NHQCLLNHIFDEIHWTPKNLLDTTQQFLQHLNIPEDIYTVYILVS*" + gene 8519..15033 + /gene="L" + /db_xref="GeneID:1489827" + mRNA 8519..15033 + /gene="L" + /db_xref="GeneID:1489827" + CDS 8527..15033 + /gene="L" + /codon_start=1 + /product="polymerase protein" + /protein_id="NP_056866.1" + /db_xref="GeneID:1489827" + /translation="MDPIINGNSANVYLTDSYLKGVISFSECNALGSYLFNGPYLKNDY + TNLISRQSPLLEHMNLKKLTITQSLISRYHKGELKLEEPTYFQSLLMTYKSMSSSEQIA + TTNLLKKIIRRAIEISDVKVYAILNKLGLKEKDRVKPNNNSGDENSVLTTIIKDDILSA + VENNQSYTNSDKNHSVNQNITIKTTLLKKLMCSMQHPPSWLIHWFNLYTKLNNILTQYR + SNEVKSHGFILIDNQTLSGFQFILNQYGCIVYHKGLKKITTTTYNQFLTWKDISLSRLN + VCLITWISNCLNTLNKSLGLRCGFNNIVLSQLFLYGDCILKLFHNEGFYIIKEVEGFIM + SLILNITEEDQFRKRFYNSMLNNITDAAIKAQKNLLSRVCHTLLDKTVSDNIINGKWII + LLSKFLKLIKLAGDNNLNNLSELYFLFRIFGHPMVDERQAMDAVRINCNETKFYLLSSL + STLRGAFIYRIIKGFVNTYNRWPTLRNAIVLPLRWLNYYKLNTYPSLLEITENDLIILS + GLRFYREFHLPKKVDLEMIINDKAISPPKDLIWTSFPRNYMPSHIQNYIEHEKLKFSES + DRSRRVLEYYLRDNKFNECDLYNCVVNQSYLNNSNHVVSLTGKERELSVGRMFAMQPGM + FRQIQILAEKMIAENILQFFPESLTRYGDLELQKILELKAGISNKSNRYNDNYNNYISK + CSIITDLSKFNQAFRYETSCICSDVLDELHGVQSLFSWLHLTIPLVTIICTYRHAPPFI + KDHVVNLNEVDEQSGLYRYHMGGIEGWCQKLWTIEAISLLDLISLKGKFSITALINGDN + QSIDISKPVRLIEGQTHAQADYLLALNSLKLLYKEYAGIGHKLKGTETYISRDMQFMSK + TIQHNGVYYPASIKKVLRVGPWINTILDDFKVSLESIGSLTQELEYRGESLLCSLIFRN + IWLYNQIALQLRNHALCNNKLYLDILKVLKHLKTFFNLDSIDMALSLYMNLPMLFGGGD + PNLLYRSFYRRTPDFLTEAIVHSVFVLSYYTGHDLQDKLQDLPDDRLNKFLTCIITFDK + NPNAEFVTLMRDPQALGSERQAKITSEINRLAVTEVLSIAPNKIFSKSAQHYTTTEIDL + NDIMQNIEPTYPHGLRVVYESLPFYKAEKIVNLISGTKSITNILEKTSAIDTTDINRAT + DMMRKNITLLIRILPLDCNKDKRELLSLENLSITELSKYVRERSWSLSNIVGVTSPSIM + FTMDIKYTTSTIASGIIIEKYNVNSLTRGERGPTKPWVGSSTQEKKTMPVYNRQVLTKK + QRDQIDLLAKLDWVYASIDNKDEFMEELSTGTLGLSYEKAKKLFPQYLSVNYLHRLTVS + SRPCEFPASIPAYRTTNYHFDTSPINHVLTEKYGDEDIDIVFQNCISFGLSLMSVVEQF + TNICPNRIILIPKLNEIHLMKPPIFTGDVDIIKLKQVIQKQHMFLPDKISLTQYVELFL + SNKALKSGSHINSNLILVHKMSDYFHNAYILSTNLAGHWILIIQLMKDSKGIFEKDWGE + GYITDHMFINLNVFFNAYKTYLLCFHKGYGKAKLECDMNTSDLLCVLELIDSSYWKSMS + KVFLEQKVIKYIVNQDTSLHRIKGCHSFKLWFLKRLNNAKFTVCPWVVNIDYHPTHMKA + ILSYIDLVRMGLINVDKLTXXXKNKNKFNDEFYTSNLFYISYNFSDNTHLLTKQIRIAN + SELENNYNKLYHPTPETLENMSLIPVKSNNSNKPKFCISGNTESMMTSTFSNKMHIKSS + TVTTRFNYSKQDLYNLFPIVVIDRIIDHSGNTAKSNQLYTTTSHQTSLVRNSASLYCML + PWHHVNRFNFVFSSTGCKISIEYILKDLKIKDPSCIAFIGEGAGNLLLRTVVELHPDIR + YIYRSLKDCNDHSLPIEFLRLYNGHINIDYGENLTIPATDATNNIHWSYLHIKFAEPIS + IFVCDAELPVTANWSKIIIEWSKHVRKCKYCSSVNRCILIAKYHAQDDIDFKLDNITIL + KTYVCLGSKLKGSEVYLVLTIGPANILPVFNVVQNAKLILSRTKNFIMPKKTDKESIDA + NIKSLIPFLCYPITKKGIKTSLSKLKSVVNGDILSYSIAGRNEVFSNKLINHKHMNILK + WLDHVLNFRSAELNYNHLYMIESTYPYLSELLNSLTTNELKKLIKITGSVLYNLPNEQ* + " +ORIGIN + 1 atggggtgca attcactgag catgataaag gttagattac aaaatttatt tgacaatgat + 61 gaagtagcat tgttaaaaat aacatgttat actgacaaat taattcttct gactaatgca + 121 ttagccaaag cagcaataca tacaattaaa ttaaacggca tagtttttat acatgttata + 181 acaagcagtg aagtgtgccc tgataacaat attgtagtga aatctaactt tacaacaatg + 241 ccaatattac aaaatggagg atacatatgg gaattgattg agttgacaca ctgctctcaa + 301 ttaaatggtc taatggatga taattgtgaa atcaaatttt ctaaaagact aagtgactca + 361 gtaatgactg attatatgaa tcaaatatct gatttacttg ggcttgatct caattcatga + 421 attatgttta gtctaattca atagacatat gtttattacc attttagtta atataaaaac + 481 tcatcaaagg gaaatggggc aaataaactc acctaatcaa tcaaaccatg agcactacaa + 541 atgacaacac tactatgcaa agattgatga tcacagacat gagacccctg tcgatggaat + 601 caataataac atctctcacc aaagaaatca taacacacaa attcatatac ttgataaaca + 661 atgaatgtat tgtaagaaaa cttgatgaaa gacaagctac atttacattc ttagtcaatt + 721 atgagatgaa gctactgcac aaagtaggga gtaccaaata caaaaaatac actgaatata + 781 atacaaaata tggcactttc cccatgccta tatttatcaa tcatggcggg tttctagaat + 841 gtattggcat taagcctaca aaacacactc ctataatata caaatatgac ctcaacccgt + 901 aaattccaac nnaaaaaaan ctaaccaatc caaactaagc tattccttaa acaacagtga + 961 tcaacagtta agaaggagct aatccatttt agtaattaaa aataaaggta aagccaataa + 1021 cataaattgg ggcaaataca aagatggctc ttagcaaagt caagttaaat gatacattaa + 1081 ataaggatca gctgctgtca tccagcaaat acactattca acgtagtaca ggagataata + 1141 ttgacactcc caattatgat gtgcaaaaac acctaaacaa actatgtggt atgctattaa + 1201 tcactgaaga tgcaaatcat aaattcacag gattaatagg tatgttatat gctatgtcca + 1261 ggttaggaag ggaagacact ataaagatac ttaaagatgc tggatatcat gttaaagcta + 1321 atggagtaga tataacaaca tatcgtcaag atataaatgg aaaggaaatg aaattcgaag + 1381 tattaacatt atcaagcttg acatcagaaa tacaagtcaa tattgagata gaatctagaa + 1441 agtcctacaa aaaaatgcta aaagagatgg gagaagtggc tccagaatat aggcatgatt + 1501 ctccagactg tgggatgata atactgtgta tagctgcact tgtaataacc aaattagcag + 1561 caggagatag atcaggtctt acagcagtaa ttaggagggc aaacaatgtc ttaaaaaacg + 1621 aaataaaacg ctacaagggc ctcataccaa aggatatagc taacagtttt tatgaagtgt + 1681 ttgaaaaaca ccctcatctt atagatgttt ttgtgcactt tggcattgca caatcatcca + 1741 caagaggggg tagtagagtt gaaggaatct ttgcaggatt atttatgaat gcctatggtt + 1801 cagggcaagt aatgctaaga tggggagttt tagccaaatc tgtaaaaaat atcatgctag + 1861 gacatgctag tgtccaggca gaaatggagc aagttgtgga agtctatgaa tatgcacaga + 1921 agttgggagg agaagctgga ttctaccata tattgaacaa tccaaaagca tcattgctgt + 1981 cattaactca atttcctaac ttctcaagtg tggtcctagg caatgcagca ggtctaggca + 2041 taatgggaga gtatagaggt acaccaagaa accaggatct ttatgatgca gccaaagcat + 2101 atgcagagca actcaaagaa aatggagtaa taaactacag tgtattagac ttaacagcag + 2161 aagaattgga agccataaag catcaactca accccaaaga agatgatgta gagctttaag + 2221 ttaacnnaaa aaatacgggg caaataagtc aacatggaga agtttgcacc tgaatttcat + 2281 ggagaagatg caaataacaa agctaccaaa ttcctagaat caataaaggg caagttcgca + 2341 tcatccaaag atcctaagaa gaaagatagc ataatatctg ttaactcaat agatatagaa + 2401 gtaactaaag agagcccgat aacatctggc accaacatca tcaatccaat aagtgaagct + 2461 gatagtaccc cagaaaccaa agccaactac ccaagaaaac ccctagtaag cttcaaagaa + 2521 gatctcaccc caagtgacaa ccctttttct aaattgtaca aagaaacaat agaaacattt + 2581 gataacaatg aagaagaatc tagctactca tatgaagaaa taaatgatca aacaaatgac + 2641 aacattacag caagactaga tagaattgat gaaaaattaa gtgaaatatt aggaatgctc + 2701 catacattag tagttgcaag tgcaggaccc acttcagctc gcgatggaat aagagatgct + 2761 atggttggtc taagagaaga aatgatagaa aaaataagag cggaagcatt aatgaccaat + 2821 gataggttag aggctatggc aagacttagg aatgaggaaa gcgaaaaaat ggcaaaagac + 2881 acctcagatg aagtgtctct taatccaact tccaaaaaat tgagtgactt gttggaagac + 2941 aacgatagtg acaatgatct atcacttgat gatttttgat cagtgatcaa ctcactcagc + 3001 aatcaacaac atcaataaaa cagacatcaa tccattgaat caactgccag accanaacan + 3061 naacaaacgt ccatcagcag aaccaccaac caatcaatca accaattgat caatcagcaa + 3121 cctaacaaaa ttaacaatat agtaacnnnn nnnnaaannn nnaannnnan nangnnnaac + 3181 aagatggggc aaatatggaa acatacgtga acaagcttca cgaaggctcc acatacacag + 3241 cagctgttca gtacaatgtt ctagaaaaag atgatgatcc tgcatcacta acaatatggg + 3301 tgcctatgtt ccagtcatct gtgccagcag acttgctcat aaaagaactt gcaagcatca + 3361 acatactagt gaagcagatc tctacgccca aaggaccttc actacgagtc acgattaact + 3421 caagaagtgc tgtgctggca caaatgccta gtaattttat cataagcgca aatgtatcat + 3481 tagatgaaag aagcaaatta gcatatgatg taactacacc ttgtgaaatc aaagcatgca + 3541 gtctaacatg cttaaaagta aaaagtatgt taactacagt caaagatctt accatgaaaa + 3601 cattcaaccc cactcatgag attattgctc tatgtgaatt tgaaaatatt atgacatcaa + 3661 aaagagtaat aataccaacc tatctaagat caattagtgt caaaaacaag gatctgaact + 3721 cactagaaaa tatagcaacc accgaattca aaaatgctat caccaatgcg aaaattattc + 3781 cttatgcagg attagtatta gttatcacag ttactgacaa taaaggagca ttcaaatata + 3841 tcaagccaca gagtcaattt atagtagatc ttggagccta cctagaaaaa gagagcatat + 3901 attatgtgac tacaaattgg aagcatacag ctacacgttt ttcaatcaaa ccactagaag + 3961 attaaactta attatcaaca ctaaatgaca ggtccacata tatcctcaaa ctacacacta + 4021 tatccaaaca tcatgaacat atacactaca cacttcatca cacaaaccaa tcccactcaa + 4081 aatccaaaat cacttccagc cactatctgc tagacctaga gtgcgaatag gtaaataaaa + 4141 ccaaaatatg gggtaaatag acattagtta gagttcaatc aatctcaaca accatttata + 4201 ctgctaattc aatacatata ctataaattt caaaatggga aatacatcca tcacaataga + 4261 attcacaagc aaattttggc cttattttac actaatacat atgatcttaa ctctaatctc + 4321 tttactaatt ataatcacta ttatgattgc aatactaaat aagctaagtg aacataaaac + 4381 attctgtaac aaaactcttg aactaggaca gatgtatcaa atcaacacat agtgttctac + 4441 cattatgctg tgtcaaatta taatcctgta tatataaaca aacaaatcta atcttctcac + 4501 agagtcatgg tggtgcaaaa ccatgccaac tatcatggta gcatagagta gttnnnnnnn + 4561 natttnnaaa aannnttaac ataatgatga attattagta tgggatcaaa aacanaaatt + 4621 ggggcaaatg caaccatgtc caaacacaag aatcaacgca ctgccaggac tctagaaaag + 4681 acctgggata ctcttaatca tctaattgta atatcctctt gtttatacag attaaattta + 4741 aaatctatag cacaaatagc actatcagtt ttggcaatga taatctcaac ctctctcata + 4801 attgcagcca taatattcat catctctgcc aatcacaaag ttacactaac aacagttaca + 4861 gttcaaacaa taaaaaacca cactgaaaaa aacatcacca cctaccttac tcaagtctca + 4921 ccagaaaggg ttagctcatc caaacaacct acaaccacat caccaatcca cacaaattca + 4981 gctacaatat caccaaatac aaaatcagaa acacaccata caacagcaca aaccaaaggc + 5041 agaatcacca cttcaacaca gaccaacaag ccaagcacaa aatcacgttc aaaaaatcca + 5101 ccaaaaaaac caaaagatga ttaccatttt gaagtgttca attttgttcc atgtagtata + 5161 tgtggcaaca atcaactttg caaatccatc tgcaaaacaa taccaagcaa caaaccaaag + 5221 aaaaaaccaa ccatcaaacc cacaaacaaa ccaaccatca aaaccacaaa caaaagagac + 5281 ccaaaaacac cagccaaaat gccgaaaaaa gaaannnnnn ccaccaccaa cccaacaaaa + 5341 aaaccaaccc tcaagaccac agaaagagac annnnnnnnn nnnnnnnnnn nnnnnnnnnn + 5401 nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nccagcacct cacaatctac tgtgctcgac + 5461 acaaccacat caaaacacac aatccaacag caatacctcc actcaaccac ctctgaaaac + 5521 acacccaact ccacacaaat acccacagca tccgagccct ccacatcaaa ttctacttaa + 5581 aaaacctagt cacatactta gttattcaaa aactacatct tagcagagaa ccgtgatcta + 5641 tcaagcaaga acgaaattaa acctggggca aataaccatg gagttgctga tccacaggtc + 5701 aagtgcaatc ttcctaactc ttgctattaa tgcattgtac ctcacctcaa gtcagaacat + 5761 aactgaggag ttttaccaat cgacatgtag tgcagttagc agaggttatt ttagtgcttt + 5821 aagaacaggt tggtatacca gtgtcataac aatagaatta agtaatataa aagaaaccaa + 5881 atgcaatgga actgacacta aagtaaaact tataaaacaa gaattagata agtataagaa + 5941 tgcagtaaca gaattacagc tacttatgca aaacacacca gctgtcaaca accgggccag + 6001 aagagaagca ccacagtata tgaactacac aatcaatacc actaaaaacc taaatgtatc + 6061 aataagcaag aagaggaaac gaagatttct gggcttcttg ttaggtgtag gatctgcaat + 6121 agcaagtggt atagctgtat ccaaagttct acaccttgaa ggagaagtga acaaaatcaa + 6181 aaatgctttg ttgtctacaa acaaagctgt agtcagtcta tcaaatgggg ttagtgtttt + 6241 aaccagcaaa gtgttagatc tcaagaatta cataaataac caattattac ccatagtaaa + 6301 taaacagagc tgtcgcatct ccaacattga aacagttata gaattccaac agaagaatag + 6361 cagattgttg gaaatcacca gagaatttag tgtcaatgca ggtgtaacaa cacctttaag + 6421 cacttacatg ttaacaaaca gtgagttact atcattgatc aatgatatgc ctataacaaa + 6481 tgatcagaaa aaattaatgt caagcaatgt tcagatagta aggcaacaaa gttattctat + 6541 catgtctata ataaaggaag aagtccttgc atatgttgta cagctaccta tctatggtgt + 6601 aatagataca cattgctgga aattacacac atcacctcta tgcaccacca acatcaaaga + 6661 aggatcaaat atttgtttaa caaggactga tagaggatgg tattgtgata atgcaggatc + 6721 agtatccttc tttccacagg ctgacacttg taaagtacag tccaatcgag tattttgtga + 6781 cactatgaac agtttgacat taccaagtga agtcagcctt tgtaacactg acatattcaa + 6841 ttccaagtat gactgcaaaa ttatgacatc aaaaacagac ataagtagct cagtaattac + 6901 ttctcttgga gctatagtgt catgttatgg taaaactaaa tgcactgcat ccaataaaaa + 6961 tcgtgggatt ataaagacat tttctaatgg ttgtgactat gtgtcaaaca aaggagtaga + 7021 tactgtgtca gtgggcaaca ctttatacta tgtaaacaag ctggaaggca agaaccttta + 7081 tgtaaaaggg gaacctataa taaattacta tgatcctcta gtgtttcctt ctgatgagtt + 7141 tgatgcatca atatctcaag tcaatgaaaa aatcaatcaa agtttagctt ttattcgtag + 7201 atctgatgaa ttactacata atgtaaatac tggcaaatct actacaaata ttatgataac + 7261 tacaattatt atagtaatca ttgtagtatt gttatcatta atagctattg gtttactgtt + 7321 gtattgcaaa gccaaaaaca caccagttac actaagcaaa gaccaactaa gtggaatcaa + 7381 taatattgca ttcagcaaat agaanaaaaa actacttgat catgtttcaa caacaatctg + 7441 ctgaccacca atcccaaatc aacttaacaa taaatatttc aacatcatag cacaggctga + 7501 atcatttcct cacatcatgc tacctacaca actaagctag atccttaact catagttaca + 7561 tnnnnaaaaa cctcaagtat cacaatcaac cactaaatca acacatcatt cacaaaatta + 7621 acaactgggg caaatatgtc gcgaagaaat ccttgtaaat ttgagattag aggtcattgc + 7681 ttgaatggta gaagatgtca ctacagtcat aattattttg aatggcctcc tcatgcatta + 7741 ctagtgaggc aaaacttcat gttaaacaag atacttaagt caatggacaa aagcatagac + 7801 actttgtcag aaataagtgg agctgctgaa ctggatagaa cagaagaata tgctcttggt + 7861 atagttggag tgctagagag ttacatagga tctataaaca acataacaaa acaatcagca + 7921 tgtgttgcta tgagtaaact tcttattgag attaatagtg atgacattaa aaaactgaga + 7981 gataatgaag aacccaattc acctaagata agagtgtaca atactgttat atcatacatt + 8041 gagagcaata gaaaaaacaa caagcaaacc atccatctgc ttaaaagact accagcagac + 8101 gtgctgaaga agacaataaa gaacacatta gatatccaca aaagcataac cataagcaac + 8161 ccaaaagagt caactgtgaa tgatcaaaat gaccaaacca aaaataatga tattaccgga + 8221 taaatatcct tgtagtatat catccatatt gatttcaagt gaaagcatgg ttgctacatt + 8281 caatcataaa aacatattac aatttaacta taaccatttg gataaccacc agtgtttatt + 8341 aaatcatata tttgatgaaa ttcattggac acctaaaaac ttattagata ccactcaaca + 8401 atttctccaa catcttaaca tccctgaaga tatatataca gtatatatat tagtgtcata + 8461 atgcttgacc ataacgattt tatatcatcc aaccataaaa ctatcataat aaggttatgg + 8521 gacaaaatgg atcccattat taatggaaac tctgctaatg tgtatctaac tgatagttat + 8581 ctaaaaggtg ttatctcttt ttcagaatgt aatgctttag ggagttacct ttttaacggc + 8641 ccttatctta aaaatgatta caccaactta attagtagac aaagcccact actagagcat + 8701 atgaatctaa aaaaactaac tataacacag tcattaatat ctagatatca taaaggtgaa + 8761 ctgaaattag aagaaccaac ttatttccag tcattactta tgacatataa aagtatgtcc + 8821 tcgtctgaac aaattgctac aactaactta cttaaaaaaa taatacgaag agctatagaa + 8881 ataagtgatg taaaggtata cgccatcttg aataaactag gactaaagga aaaggacaga + 8941 gttaagccca acaataattc aggtgatgaa aactcagtac ttacaaccat aattaaagat + 9001 gatatacttt cggctgtgga aaacaatcaa tcatatacaa attcagacaa aaatcactca + 9061 gtaaatcaaa atatcactat caaaacaaca ctcttgaaaa aattgatgtg ttcaatgcaa + 9121 catcctccat catggttaat acactggttc aatttatata caaaattaaa taacatatta + 9181 acacaatatc gatcaaatga ggtaaaaagt catgggttta tattaataga taatcaaact + 9241 ttaagtggtt ttcagtttat tttaaatcaa tatggttgta tcgtttatca taaaggactc + 9301 aaaaaaatca caactactac ttacaatcaa tttttgacat ggaaagacat cagccttagc + 9361 agattaaatg tttgcttaat tacttggata agtaattgtt taaatacatt aaataaaagc + 9421 ttagggctga gatgtggatt caataatatt gtgttatcac aattatttct ttatggagat + 9481 tgtatactga aattatttca taatgaaggc ttctacataa taaaagaagt agagggattt + 9541 attatgtctt taattctaaa cataacagaa gaagatcaat ttaggaaacg attttataat + 9601 agcatgctaa ataacataac agatgcagct attaaggctc aaaaaaacct actatcaaga + 9661 gtatgtcaca ctttattaga caagacagtg tctgataata tcataaatgg taaatggata + 9721 atcctattaa gtaaatttct taaattgatt aagcttgcag gtgataataa tctcaataac + 9781 ttgagtgagc tatattttct cttcagaatc tttggacatc caatggtcga tgaaagacaa + 9841 gcaatggatg ctgtaagaat taactgtaat gaaactaagt tctacttatt aagtagtcta + 9901 agtacgttaa gaggtgcttt tatttataga atcataaaag ggtttgtaaa tacctataac + 9961 agatggccca ctttaaggaa tgctattgtt ctacctctaa gatggttaaa ctattataaa + 10021 cttaatactt atccatctct acttgaaatc acagaaaatg atttgattat tttatcagga + 10081 ttgcggttct atcgtgagtt tcatctgcct aaaaaagtgg atcttgaaat gataataaat + 10141 gacaaagcca tttcacctcc aaaagatcta atatggacta gttttcctag aaattacatg + 10201 ccatcacata tacaaaatta tatagaacat gaaaagttga agttctctga aagcgacaga + 10261 tcaagaagag tactagagta ttacttgaga gataataaat tcaatgaatg cgatctatac + 10321 aattgtgtag tcaatcaaag ctatctcaac aactctaatc acgtggtatc actaactggt + 10381 aaagaaagag agcttagtgt aggtagaatg tttgctatgc aaccaggtat gtttaggcaa + 10441 attcaaatct tagcagagaa aatgatagcc gaaaatattt tacaattctt ccctgagagt + 10501 ttgacaagat atggtgatct agagcttcaa aagatattag aattaaaagc aggaataagc + 10561 aacaagtcaa atcgttataa tgataactac aacaattata tcagtaaatg ttctatcatt + 10621 acagatctta gcaaattcaa tcaagcattt agatatgaaa catcatgtat ctgcagtgat + 10681 gtattagatg aactgcatgg agtacaatct ctgttctctt ggttgcattt aacaatacct + 10741 cttgtcacaa taatatgtac atatagacat gcacctcctt tcataaagga tcatgttgtt + 10801 aatcttaatg aagttgatga acaaagtgga ttatacagat atcatatggg tggtattgag + 10861 ggctggtgtc aaaaactgtg gaccattgaa gctatatcat tattagatct aatatctctt + 10921 aaagggaaat tctctatcac agctctaata aatggtgata atcagtcaat tgatataagt + 10981 aaaccagtta gacttataga gggtcagact catgctcaag cagattattt gttagcatta + 11041 aatagcctta aattgctata taaagagtat gcaggcatag gccataagct taagggaaca + 11101 gagacctata tatcccgaga tatgcaattc atgagcaaaa caatccagca caatggagtg + 11161 tattatccag ccagtatcaa aaaagtcctg agagtaggtc catggataaa tacaatactt + 11221 gatgatttta aagttagttt agaatctata ggcagcttaa cacaggagtt agaatacaga + 11281 ggagaaagct tattatgcag tttaatattt aggaacattt ggttatacaa tcaaattgct + 11341 ttgcaacttc gaaatcatgc attatgtaac aataagctat atttagatat attgaaagta + 11401 ttaaaacact taaaaacttt ttttaatctt gatagtattg atatggcgtt atcattgtat + 11461 atgaatttgc ctatgctgtt tggtggtggt gatcctaatt tgttatatcg aagcttttat + 11521 aggagaactc cagacttcct tacagaagct atagtacatt cagtgtttgt gttgagctat + 11581 tatactggtc acgatttaca agataagctc caggatcttc cagatgatag actgaacaaa + 11641 ttcttgacat gtatcatcac atttgataaa aatcccaatg ccgagtttgt aacattgatg + 11701 agggatccac aggctttagg gtctgaaagg caagctaaaa ttactagtga gattaataga + 11761 ttagcagtaa cagaagtctt aagtatagct ccaaacaaaa tattttctaa aagtgcacaa + 11821 cattatacta ccactgagat tgatctaaat gatattatgc aaaatataga accaacttac + 11881 cctcatggat taagagttgt ttatgaaagt ttaccttttt ataaagcaga aaaaatagtt + 11941 aatcttatat caggaacaaa atccataact aatatacttg aaaaaacatc agcaatagat + 12001 acaactgata ttaatagggc tactgatatg atgaggaaaa atataacttt acttataagg + 12061 atacttccac tagattgtaa caaagacaaa agagagttat taagtttaga aaatcttagt + 12121 ataactgaat taagcaagta tgtaagagaa agatcttggt cattatccaa tatagtagga + 12181 gtaacatcgc caagtattat gttcacaatg gacattaaat atacaactag cactatagcc + 12241 agtggtataa ttatagaaaa atataatgtt aatagtttaa ctcgtggtga aagaggacct + 12301 actaagccat gggtaggttc atctacgcaa gagaaaaaaa caatgccagt gtacaataga + 12361 caagttttaa ccaaaaaaca aagagaccaa atagatttat tagcaaaatt agactgggta + 12421 tatgcatcca tagacaacaa agatgaattc atggaagaac tgagtactgg aacacttgga + 12481 ctgtcatatg aaaaagccaa aaaattgttt ccacaatatc taagtgtcaa ttatttacac + 12541 cgtttaacag tcagtagtag accatgtgaa ttccctgcat caataccagc ttatagaaca + 12601 acaaattatc attttgatac tagtcctatc aatcatgtat taacagaaaa gtatggagat + 12661 gaagatatcg acattgtgtt tcaaaattgc ataagttttg gtcttagcct aatgtcggtt + 12721 gtggaacaat tcacaaacat atgtcctaat agaattattc tcataccgaa gctgaatgag + 12781 atacatttga tgaaacctcc tatatttaca ggagatgttg atatcatcaa gttgaagcaa + 12841 gtgatacaaa aacagcacat gttcctacca gataaaataa gtttaaccca atatgtagaa + 12901 ttattcctaa gtaacaaagc acttaaatct ggatctcaca tcaactctaa tttaatatta + 12961 gtacataaaa tgtctgatta ttttcataat gcttatattt taagtactaa tttagctgga + 13021 cattggattc tgattattca acttatgaaa gattcaaaag gtatttttga aaaagattgg + 13081 ggagaggggt atataactga tcatatgttc attaatttga atgttttctt taatgcttat + 13141 aagacttatt tgctatgttt tcataaaggt tatggtaaag caaaattaga atgtgatatg + 13201 aacacttcag atcttctttg tgttttggag ttaatagaca gtagctactg gaaatctatg + 13261 tctaaagttt tcctagaaca aaaagtcata aaatacatag tcaatcaaga cacaagtttg + 13321 catagaataa aaggttgtca tagttttaag ttgtggtttt taaaacgcct taataatgct + 13381 aaatttaccg tatgcccttg ggttgttaac atagattatc acccaacaca catgaaagct + 13441 atattatctt acatagattt agttagaatg gggttaataa atgtagataa attaaccatn + 13501 nnnnntaaaa ataaaaacaa attcaatgat gaattttaca catcaaatct cttttatatt + 13561 agttataact tttcagacaa cactcatttg ctaacaaaac aaataagaat tgctaattca + 13621 gaattagaaa ataattataa caaactatat cacccaaccc cagaaacttt agaaaatatg + 13681 tcattaattc ctgttaaaag taataatagt aacaaaccta aattttgtat aagtggaaat + 13741 accgaatcta tgatgacgtc aacattctct aataaaatgc atattaaatc ttccactgtt + 13801 accacaagat tcaattatag caaacaagac ttgtacaatt tatttccaat tgttgtgata + 13861 gacaggatta tagatcattc aggtaataca gcaaaatcta accaacttta cactaccact + 13921 tcacatcaga catctttagt aaggaatagt gcatcacttt attgcatgct tccttggcat + 13981 catgtcaata gatttaactt tgtatttagt tccacaggat gcaagattag tatagagtat + 14041 attttaaaag atcttaagat taaggacccc agttgtatag cattcatagg tgaaggagct + 14101 ggtaacttat tattacgtac ggtagtagaa cttcatccag acataagata catttacaga + 14161 agtttaaaag attgcaatga tcatagttta cctattgaat ttctaaggtt atacaacggg + 14221 catataaaca tagattatgg tgagaattta accattcctg ctacagatgc aactaataac + 14281 attcattggt cttatttaca tataaaattt gcagaaccta ttagcatctt tgtctgcgat + 14341 gctgaattac ctgttacagc caattggagt aaaattataa ttgaatggag taagcatgta + 14401 agaaagtgta agtactgttc ttctgtaaat agatgcattt taattgcaaa atatcatgct + 14461 caagatgata ttgatttcaa attagataac attactatat taaaaactta cgtgtgccta + 14521 ggtagcaagt taaaaggatc tgaagtttac ttagtcctta caataggccc tgcaaatata + 14581 cttcctgttt ttaatgttgt gcaaaatgct aaattgattc tttcaagaac taaaaatttc + 14641 attatgccta aaaaaactga caaagaatct atcgatgcaa atattaaaag cttaatacct + 14701 ttcctttgtt accctataac aaaaaaagga attaagactt cattgtcaaa attgaagagt + 14761 gtagttaatg gagatatatt atcatattct atagctggac gtaatgaagt attcagcaac + 14821 aagcttataa accacaagca tatgaatatc ctaaaatggc tagatcatgt tttaaacttt + 14881 agatcagctg aacttaatta caatcattta tatatgatag agtccacata tccttactta + 14941 agtgaattgt taaatagttt aacaaccaat gagctcaaga agctgattaa aataacaggt + 15001 agtgtactat acaaccttcc caacgaacag taa +// diff --git a/rsv/profiles/pango/amino-acid-genotypes.xlsx b/rsv/profiles/pango/amino-acid-genotypes.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c886b1338f50aa03c3f988a890db478c366a12f9 GIT binary patch literal 15040 zcmeHubz2`zvNjqB?(Xic!QI{6-G8{dLvVN3;1b;3-5ml!gFC!D*>ldbo89*loHKv) zTs<>Y(=$_d*L2lg@=~B+C_tZpAc25@2!W6=HaCocfq*c;fq;;KAVIW*>};J(Y@PI! z-R(^rb?Dq|tO@eLKqzv7Kt8VjzwLkV8|Y7*vh8Jn8N5iigHx$e!ro?(K@RSF!(lJ1 zg&4*XBAz9QfDLTU!PZYNh700fEmFXKe)e%Nf*(2=G^GwRZ>JXNItqu;K4^5fR&1Bc zx#o%^q%29gg_EoWM%kOWU9fNc?1yQ_MI$1Ea$eM59wKV61In?P_P9*3yTp+qpaRVb z!FhCRYjS|=Uy!K@%ZSC>D@{vlB4_p1P8zPSU<|+3y;y-l1!hGhB2A?x`wkd!g#$M$ z#;BC(#vVkdo6FmHT+>gvA1w+O6=G9}(^c-n0N5Oq#6-ehpbr?t!AbIEq(L=QC&Q}c zb|VN_R05aQifBfPVm2T0Zs!Cj6c_m##NK&HJP3d7;d4OP;)?z0?n4x;zmL%Rab{{!X!VkG|4 zqgTeu$n`P6gj|Tfgbv>?Y{Vi6O1TP%w-GA)_(`n8H%1qbU~hDh;~*$w1%itCcKE!E zu5EBf9}g4W?XpxxBB5{*H@SWbNqKT`hM*z?B#Sy!?(`$NEZi>KrHM(pQ@C`*P*r>{ z&6670A{L#y6s|>>qEo~Aj9i2hg2t2PuQ@2Av1#~F1-u}rboMQzrkOqWIAJQocPY8} zCoEqWr}XJ;8tSm4q1j56*RVC=?IV_ok~ybEjbS$63z55?v32*ga7GvMs~4?o=CC3W z3(^DAl-Lky?yaw81Iy)DmU}<@=e~;JUn2pL_@$d4e(K*rB3V{!@bvLU#6W?7uz(VffH9dfR&-0oz-7H?7uMs{NczxwEXYh+T$lI`x#(@E`mD)XF43>-{eQXcv1#; zG_64})FOq-SR0k`xj&~Tp0m{yrc4)*uv{~an?7^i?BWtTZ&Fg!;efTSW=u9h>E6q! z-4ld0GPmBtp+^PTvryhA^)Kjv7gcR128W3-h=kQ?pfX>;mEh^pG;|mm+2@dQ!)FDwr7W z-25f0)FF<@~eFRI2FMRd679>sh>mMm|Z} zAPYfS0>FVAl2+SP(wo}k;y`6jR5(T zV`j7{TH)uBO}LMwUwceYFKaB+8ABCcA+lPHXe{n$R@Cvc4boAim5$Q}(989ds!_*X zmCCNgt64%5x%Z$<+W~5AJsOyJz^ja}%5=0WbgF<2DWURox#Z_4l-{pu%SpA+5bi=a z21R?9g7JG8mOIFozW`Jslow?>6c2%-n+jug$uSVG2*ESbA%o5)~y`@wMgJyTaruqC%0;<#|c!@1^Uqu%z18XY@rO_ z^d3UNSE~9{QEH?Z28fYm?f{)}qJwIcwDUMY>wSTFCtt0_v;r6c1RXRO2TGia_(R*i z`=~nN-mz2*>fP)E_+uGF;{P_b)jM9DZU@&7tfxPmHA8VjKP7GKm# zL5qFMI-9$4#zWC=X;~($m7lSKKgZFUF#t{b96${h(G0QNgMw?Gj*C9+E6*zBT#m4B zi9L1q9^)QZQ@BNaFDe20rYEXq@6mjUIf;ala3=YR1UIJDK9GoDh9RuF>#jr{R%!#T zbgIG8WFa^*off~3EljAx_f4bXDvOup$8sS-NWnL&rk}2mcUo~B&%S+aF-19iFt3~J zxMi|W9ozI1ZSgnr%iLj96?*)9P0%y#=Ow~&daV->ab;fCt3oN~OFoCYj1YtU&NoEL zQKoEcTlQfPBn~#zE*93oiHg+ryZ)@bY_NiM7%Op0jDiq}t=Z7?ttAT1&PZU;2%l9N`+j_DgIMdZdooJ0@o-V(+pk*SyTYHGoO#`2oWbqG+$@XR?JPeQINkPIa z7f;jO6iDTW)ok1%btk>9Aw~oe;weo3?5-_kucM8Dv$m2X=5h&fKneYt z{H;^+^?gt9CsxST)=av|WMR5gGOs{&>ZJ-KdpPYtw;KwX1 zbMXYo(&R$%q*e<>E{kfo)c$f`f;}hWxwrc)SQ6!LO%0yF&)=V4HoRTQT5Q4ZvrBDL z<0+x&LG~3iO5;atB072Y-(cLM3F}C-CDQXjZKj>?E`?@OK}+trB%>h^>Lg|+Q$740 zlukBcKBvzIQdK^%|lxQFmN`=(Jc5x5{ZuUCwph+zamqQ#( z;ozX%^VQdFi)M}c{4KF%#djKal*M!_LWMGK2RH*zHCfA<(ETD+RB8h{3%Q0Ir6Z7$ z2)f`YqZ|-;#yS=*gwyq9a#EAvO(cp6E!qXoihvpYOKE#{70U6MnsX{^uSes5l-vt* zs1pU%7s_i}O!+yY)>SxR{Ij^$AE8B7v7KAETHEYYz4b^q374nFg-4Mc z%MXx%db4$gN69BYjixDRsAmI&wwbv#nTRW|PN;279N3x2bBKjzYW8p5;@Qks;_iQXWvM%AL^zZlu&>kCm=9Ix$PQuF7Ym}ysZg~h6?PbDxR;y(J5vWiDx zbBpV&(@($d?$V2nmL{$&J3B{+ebH#l-uB}M-?(+FUft?6tCk+Q!vYpAX24(Zm+GH$ z_>F=SMEZ7+mb~k{BkswyB1AL3c2*Eh<2mMt&21~;eDd+Z406WZ^XO)fnwvQyP$7_4iz*y&rWfBGi1ct%Kl%0_A=HD8&?)-? zpFx7ZLJYs%tdqHkjS2nlmhmqjooY-(VzZ-kp+E7#IJ&;D?!}O9ZckY!Y>*gdCE_$T zohqm@GsU-KBZH7~-YQZQ7NrQ<@g&ajgTi)vV3?%gC;kli#4_=>1Ck7DNlh2nNU^t$ zl9KyuACGIN%h`@p53y9}-uPs_PWdaI%&6s5lDTAGD-H@4FFEnVC`6ObwvW&>FH&nc zJ^)0U5y+us&_`kGykvX{&NV8u0mzWJ@7<;x{##^GL*l)}IcP&TxPhCgXw*fD-!O>d zA9Mo@m?94?bxK+2+18wMgrGsn0lG{p_)vC4dan^eoZ?E!7^n zJ$cpaYMLzpHfIS)DsEc-a*|{lYXB?%w2;@PcA+wcwmGe*Q{RGH9uvHjtL$47f)CTW zEdpIFK*_7!7n)^SVs^5Wi=wC1we|h$aT6!XzF2aCL0OJ2%7r5$UEE>f8l(0{Uzo&$nMcuS2_dd~c2( z`}FEs8mXjgAxM>!&KF|z-d|tQH}$&SejQ)3|C}ek?CtM*c^pgWdcXW~TeG{4O5f>y zb@ej=earjyP%Z{{bqxnM^%r4y?XRjGqYJV~1An;ZOJKgNeg?t*cz(nqm(ugGVGvy{ zg5zH2nzPqb#77_MKo%hShx{j$%N-blD^32zIZ zjop+=TD_Xsw5jeZwOZiuGA1_o7zcw-SlPkgm^q!osUoK^BP(ua zr13rzz_&+?tcspQ*dv1S`pJkUNb1Lz&|55lE-08ixMw__eztfiz|m|=BnERFkIQnj zJh$Vjbo_9?4;dVxNSte+1iDwu3YRdC5RQBu<9hSwrruXmqVM4Qmg$M)VNWKLxV7z3 zX&w0JP60KMCac^{%Z(G^%v@it%Rc^MF_fI51~ELFc^%cK4VcFcTV~XV^pkK;B`c|i zCsl4U^?^;|%dHkE0V$BwCij<#HZ^8;4gqdx6ybWB38HhIoE!7#lnF$wOP?A2{w;;6gu@cvwGI;YE9Z8B+J8RD69vIgx~9-d-eL(nN%WA;no6^tNKCuuK*mST zv>G!FqbX#v(VZC~eXH!oOqOd@80t)onlt_FS@zm3CVEcRtfjIwy$l*W{?Uz6J6b43 zviL}o=VJ5Q{E5$zva3eQCyJFh3ZIOEbRtI-W zXN&R~i>LIoW*}zc9mc{qqQL3_kD<9ax ztt45&>FvOej_CmXa>$NYsIE>=4e>SHFES8tJi+LnQG*(H@aVd$KWe_z9x=X~cWOE4U5DHTJO09`<0p$BX2kr!C96Xm*j@gskX!AXOlIA5eN5cLLp8wl%%~$M{GbVjZyIM|cq%^CQat5BK}mD%a86#Kg&w{`c;;56(=G zidkhq3BFHy!l$`Nhz&9m#z4fFUr|;miD}$hM~LK*wYg6H`n2U~M8J%M&4RQo#<9h@ z)S1SYX)FVm8!ay*DdfxlnME5lf2H{5FeaM4s9${%rBW5P@CF>lTixr8g^#L(8P$(e z++13c+G>pi5~iA;X|Dne%S(}pHf^A2^HcFcHxzliGXm_VJ$|N{X=cKidj@3aFXHsBXdm(=?AQf-cX@Y2?>L8 zL0b=&dv|u3h1;5#Dw_BsJ)GAzkM;U!44VnICtc9UaGj`2xAyxc377pDU)fKFOwdF$*dE2uuLZYZT~!ca zO(atc>LNl})W>x|UyiM31sp~v-(aEH4 zJh7Lfz{#z3NS#b|poXgnzoQ&a-uYfEF$q(t{Hlf0B&Z~< z$Ukz|_*(TZA@*911i$jk_3YsFrL7}+-*Wdl1RdPLTiTQ@e2FP{X$ zh&|s(q4$1}%H8?2?hZiHJL1qjOpLNlIjH4ih(o{Y_G8F_C9DsUczyK~-HY$<|F_>w zqWX;O8Uw;}5&?dVaQqpI2$sH!FUc^-X&H8U?#7$XOi?oF72I+MDxG1f~1b4CG7W4+Nd(bgi=4d~Bq37hTZsMLZHeZ2sO?P z=9NBc_nt(V{`k{o54Dj$I@~i1qV5<)KZVi1EVpPRI9cQ;H)mwwR7)#6a91!~k! zc)SX928O;z`qWSci|OTy0#e|ZJ&3zgaimfiKKZ#ssAMqiPWRO(2-Nci=klx3F3xbOW(g20VEC(svWDk>Dt6vB5*I;R| zEM*->kU>Cf8&~?!tZ6W6VYy?hP?PC5nXZHQcN6&GxsGc;#=`@bu9)AtYhLEANZq8ifNtYB)FEo+HDKn>1R1X@~0C$aPB}e<`b(n23ssqQQ4PPgUf= zy><9HUSB;O&*X7V!olCEO;zP|wR>D1{lahiso?Hr@961%`*KxsQt?{Qmvhe^y2bN$ zqKCJ+zK*xuti~P#=iA=lt&^3x&i(B0eth-g;pz4bYU{=R^~b^i?PjOD1K|2;;_d2j zBIfz^tRhF&I{ARSnods_7mvHm$J06T(8Jg5*WK|!*XFrgn_OMr5WJl(uaDcaTRjh- zTWwcs=j-F`+s??@UMTqsw|BGmd*-C4$K&+*yW0j-O}^;=HV*NJC*nToiP6NouW zvr1f^R267sHVjL(Y$J6v(?|p52i%adk@57L^l`^^Oe4$J!IiX4&pu8pWT-c5OPQwq zEiI*A7fXvCKAX$>oRikWk2P8KF0c9;3yUkbgDtRmJ#OdTrHpLuKL#<_MSd~+=xTDi zYDCNIjDyj;6jQ8A(i`GE?yj4XeZ{LRp9{mSJdOLd*h7QQqNTJz2JK~15lxN0?#bEP zwOn_Y%|3{AUd*;>zr^aoctH0J8ac`_!%7WWE19~z%U)x0gC?to&U9fhZMfE9(dK(a zF6sg)-c*sPPmqHJU>V4>o_CLfwYa}w?6`M7FSSGllSaA?np2OaW&)7lGM5TS(amzl z32{;Pq!F(kG+}d}Rz_dkzlJzDZnrP!WM1xM?uOQP$8n=DhoWiP2hM4OfrxAB55&OM zV{@HaA4%Q|5VexF9Xvxem{VZ5Dsb1eUkx9*+NT(tQD_=OaYEotzOL3uwov<|m(3k2%YK(`v$cNWb4b5s7(jAhb6JBR%h&7 zJ+}K>-IsbOBSmpx^H1jQherU7=u zr^zzlKzq5q;)3`UFB~HS`lUz7?Dc*fmWX3#41~DyqE;0oZX^pQtJEM6Zd6t4(}>Gt z7`YSMGNNk@Ei8|*ji3)%j5I}aBjdDM4RC5=r~XYtTUZXZ-ge5>g0wzRiL-}4kc&aK z3%hfiC*#7p!JBjA81Q;Jnw3aY6A)ai3|y99CPo&++Gte2Zq*MMF5d_<5GBBtp=6d= zKcJD4N8wWL=)tG^%o^B=9LHeYck3J*@pONvR}W{G{r=@gwlQB9AzkSGh7x4ajk9Sx z>1v#{y|r}|Iwh_~&DS8T^8oYfiM+0zGyEX)eeck0b%-}%UNjKq)3^x%9F_>4J@3hv zK!tgDBTl&s5#g`az)-^%(6x3^<){o11_tvl7=0p7T!^UcY7_po#+`l3lWL~pgJMl! zQ?urC7n0jun~^g+o<#~#`K1^F%;)uYB4L_`%dNvS@3i_|h=qK5Cb9R_pud(a_<6=b z*g$<{!D%_n8m|bZ_Qyp!&y~ESlWzO)!DD+VCKi&9DpwA}@CP@*-1xQCF%D0f5^o<} z+}_?S*ACXdDVj8goxyYqF&c!=N*M-(6KeVBZ|eRW3)TRCL+b8t1rhQ4q0vAz5IQl9 ztfJ1wX#qmP&p)eBnfpDIU*N8j1@5|9ffX_**BfIICqsWW41}6a|J4y$-)Ek^6rs=B z9={K~z1+aa$%TSMGM|wJ8U>t+*cIFfF&P>z*h7(7U6xh~UA2-bzsTpRwgfHcn`f~`ymde&I5aUqU27T|Y;FL0t{7)?HYkU4mQlntPXvmdZVwX$; znEl?~!Qi(M+B2M4_sK~p9bGrW9+0&yB8x3*EAeTY&@DYW_mWuQ%$Ybz8Ntgn<;i$y zmO2ZZFw-ihSva>EF*xz?rS_SPRG8O4M8@56XqkG|MxX_m)h8~JU`Q`JpVR$yz<6xN**Fv?T} zvpZa!e^#D1}t=No0RtPF38i7ft^MevM1c`3?RLv!oKo@YPyjShr z2@2UDd_YE6=iEF9?e$0BK?Z&ysIz@WD{=8Ey{THTl{yEQ4AV)SBClJ;AQ=yg*>&ZQVxl<-S zd_NaR`*^Bhtw=EbHh6?tUuR$r6($&{TF8EsYUFfD)EprwOtKncZSuT=$5L++hi_0( zq&_`N16eZ4c7`A~44@?!?q{QwtRJMfEW_|{SG7naudi1MLCp&hwv>CY&p?B2X$ydm zWA~Y2hWW{$SDp5#$VvhrJkYT2}ZcyiEx}oPAVL3YG`7*Kw$s zE6XPfyB~Wm=nQf6(n2?<6%A^Aa^LWA5E^xVIQdRWtC1JXZpS&?MB$JF*=1zHeAkmidJH9Goh=V0NtXc8UKVKNZ% z9JGTenb$lrS^bLr#`-{s^F+jceLuB7B5whdUWC%)15-wk*}x%bG;;rd6O1+vED|mZ z_?nu#|3z>D>}^(=SgqTZ`-8S1OBHNm$)xGx6UM7+?gUmFDFQL5x^9>tec{aX(2iPg zRf#&F3@7k1JECtrKznBxub%v)4Sg*2j3T*lcO^?bx&PaINs0QPvjUBjp;{gpyr=2< zYKJ#0Bn~sV>r%IT|6+8#Hcqa5#;X5t4bj>Pnq{T`1yX!gr*DL=nI;UjzgMXp`r+C@ zu&O(;LL%epL_GY@j?*`k2!bgS=+eXQePnC*aeJ63JvW^UKc9O6+(TrX`N>yL>qVwl z>nl+iHuH4W+Q8%ki$-}q@K~9I#K43<#h7s^=S{(eR8$fkX951=PPzhWITc^NV1u)wW;4{A&>TALiehNw2cXUz3`_6_&s zLb*rAE?Ui}fsyX}PS|_rZD~btqqZ%5Y+EN^!@~-B$rD6Q>o$ zmp2n%^J_&t(47xFdj4IX8z^9q&>oYjt^4Bh4o7De`a}jcbTG8H5lW!8do7;~vGjPEv8>zpgJT2){G*f4N-)b}*taaOzy^P5OX=RHE-#BJvs zp9lWX#H81#XwpLbK?_&}3u@k^tM7?<8Aux|(e{s^D|!_H*MYhtiIMXZ%?s-Vaz*j* z!9+tMX%v#KU%CJzpgPR_!MTu_aHzLg-s*Hdqfelj9{ye#g(YEOi>AMT!e0POa&oK0 zbu?PgQs$z7!E-Kpi6VEacBjIe><+Q_;_iN!7U&hk+t?a7$%uFz8zrp*u1b^Xvnu-U z>3HOUE}Si;L%iNa;MEuGwjlb!@*L2 zaNCfpiv zDnTjC>#`D95Q+y7P1Dd@PUKQgM<^j+5bA0PI3m=zA?U!U{;V|8-^UIZ^+ zweh4~8bKh3TPB=&g(Bg<4!$u0K`McIY*{>gC;D++717}Pi%fak)#t7 zx_k|VB@RdZ`3BkT>t4*I*SMR$tF?MuP}C}vDa1;OK{ynd&=#&A5F3COCb>p)yFG!h zqLG9wFDKt{5*kb?RyZR;A}jC71T~Q%`Z!C>__AG9US61$v@q-rSglJ}EbSNAK=&Kff<4eZt^zOYwVR zHpPzZ*oh>ii}PLF^>|bZ{)TYGjJ70IcZ7qkkkapaU_HGI$6Hv-YNz+=`2J)2DKzj5 z?;Uyfc;zkYeat@K-Tv~S>pw1*ttXl0dOm`fs{j8WCevSu#LNVln2(Hn$i-hl%&i`N zKT-(EIJ~4 z4EbPR3@le{sGjzosVFEMvRzr#iUT9EyJv)7*!o(x?K9S?fjRF;#FzF^D`~zxID-b@9;t_uqgE{`elUs-;( z(eAfdn%8vjAqzZ(D~saaSOh6&^1U{l2Co_gH-R$lgi^>| zy--3$?B+pUamWN3Hi`#YWDdxUS;wKQ+|XxDS%#BymlC0+NBBgH4HV{D^J2r^DE0Pq zU?%j$SQ$0-^)fGpBkb_|V`K_&gz(Vtmqqx9TAbfQ4iJK@Bt5uzr-s`O+i3iY{;`Z9;o6R>|9TER(zQ>8qaL(1fjF{Iv67sUvcenFxQZ3{u$8)C%M zjkUlyq_mY29l^@f8}SzpU*@#CDLS9NL=8|Bg&af|5BTg#wpSK4fEne%7b5iTJZ?kUz@5jfBSC_6$UcBnH zA7abda!d11gB>puS?@mCF++3b?!0GJjt&%&D4sgFD$ZqR%l*3~9GUjj92PwiOfTGX zz%&#x&FB8)zqaPKbuWPqx%x+hfZ&UYZ5DY>aq+&pUQQ#88}PY@d16eLDjO>3Lh9PDrhB#5?!uBJ=C0sufP-JB#)}eI}~t9un5ia>m7`oXmv@74CRpC90TS@#Jhu^$+}lE$LohQ8Y!zu%aW# zS!M2*GsH#O6W8XNZj;fPW^u>ia#Zy-)#6zo4IY^mNh0Osf@R}_ljWsl{V8oLpZBvqsqd95jWw{ySBmwEU7RHzM|E7-@NG7E_a`>KKhN4)oCq3eb{ZGwXy>ywc()*+C5vW zM!$z5Tlx2>)H?0S{@n-HtM^{J-x&GbSV#B31u!58`xdRhBZvz}QIcp&bGuz`cg)hV zB-`4gQ25fP9*wNE8=GdI4qJ^Sw*$l?3JjY%(SIV&%ul0*HVrpF*R2W=b2+BrKxKn2 z=9X+*mta(n%B1oirEvbl0w54-MkF6%K@DT2y{fP4lYOrWlIpy z44%P9GGn{7*|1@0!VBp>!gKfrI43>W%}w8j&P)+iqnLt}Xlp%lX+L<@EovVZ2P4sO z!SWR3Q!Gg)ku9u7H{XJ$$P>Q96vHn# zj zM>-JL&ZZZf392>%JSp^e@Pp5zPebqpvh~KV?A%*#Mx=i=lhgmr000J|{m3f)=PJMd z{(S$v{|_~P@>2h*;9o0|{#)>`ebxs?{;4kMPr-k!&iH4+-cP@CUjLt3jX(AL$<_Z` z)5}L8;~&iZKZXA!@%}AL@Zom;O1%9KrT0(KKgo-Kiy9&P7X6dT_@{zD=@)-1*hT-J zf&UvB<4-MrCJX=8;(_~*C-^gU_@{8KWyZ$(l= Date: Fri, 12 May 2023 18:01:41 +0200 Subject: [PATCH 2/7] Add Excel to clade script --- rsv/scripts/excel-to-clades.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 rsv/scripts/excel-to-clades.py diff --git a/rsv/scripts/excel-to-clades.py b/rsv/scripts/excel-to-clades.py new file mode 100644 index 0000000..35eaf5c --- /dev/null +++ b/rsv/scripts/excel-to-clades.py @@ -0,0 +1,31 @@ +from typing import Annotated + +import polars as pl +import typer + + +def main( + excel: Annotated[str, typer.Option(help="Excel input")] = "/Users/corneliusromer/code/nextclade_data_workflows/rsv/profiles/pango/amino-acid-genotypes.xlsx", + sheet: Annotated[str, typer.Option(help="Sheet name")] = "AA_RSVA", + outfile: Annotated[str, typer.Option(help="Clades.tsv output")] = "/Users/corneliusromer/code/nextclade_data_workflows/rsv/data/a/EPI_ISL_412866/clades_pango_raw.tsv", +): + df = pl.read_excel(excel, sheet_name=sheet) + df = df.with_columns([ + pl.col("Signature Mutations").str.split(",").alias("mutations"), + ]) + + # Output into clades.tsv + with open(outfile, "w") as f: + f.write("clade\tgene\tsite\talt\n") + for row in df.rows(named=True): + if row["mutations"] is None: + continue + print(row) + for mut in row["mutations"]: + gene, rest = mut.split(":") + site = rest[1:-1] + alt = rest[-1] + f.write(f"{row['RSV genotype']}\t{gene}\t{site}\t{alt}\n") + +if __name__ == "__main__": + typer.run(main) \ No newline at end of file From 6a2a1346489be346cb30f3038100b3c6b28b8bd1 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Fri, 12 May 2023 18:20:45 +0200 Subject: [PATCH 3/7] wip: add to snakemake workflow --- rsv/Snakefile | 6 ++++-- rsv/profiles/auspice_config.json | 5 +++++ rsv/profiles/builds.yaml | 16 ++++++++++++++++ rsv/profiles/pango/amino-acid-genotypes.xlsx | Bin 15040 -> 11752 bytes 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/rsv/Snakefile b/rsv/Snakefile index 14b708f..ed027a0 100644 --- a/rsv/Snakefile +++ b/rsv/Snakefile @@ -68,9 +68,11 @@ rule lift_clades_to_reference: input: clade_file="data/{a_or_b}/{reference}/clades_{clade_type}_raw.tsv", reference="references/{a_or_b}/{reference}/reference.gbk", - orig_reference="data/{a_or_b}/{reference}/clade_reference.gbk", + orig_reference=lambda w: config["builds"][w.a_or_b][w.reference]["clades"][ + w.clade_type + ]["ref_path"], output: - clade_file="data/{a_or_b}/{reference}/clades_{clade_type}.tsv", + clade_file="data/{a_or_b}/{reference}/clades_{clade_type,[A-Za-z0-9]+}.tsv", shell: """ python3 ../common_scripts/clade_translator.py \ diff --git a/rsv/profiles/auspice_config.json b/rsv/profiles/auspice_config.json index 360d766..974c217 100644 --- a/rsv/profiles/auspice_config.json +++ b/rsv/profiles/auspice_config.json @@ -26,6 +26,11 @@ "title": "G Clades (Goya et al)", "type": "categorical" }, + { + "key": "pango", + "title": "Proposed lineages", + "type": "categorical" + }, { "key": "date", "title": "Sample Date", diff --git a/rsv/profiles/builds.yaml b/rsv/profiles/builds.yaml index 2aa010c..8afd6bc 100644 --- a/rsv/profiles/builds.yaml +++ b/rsv/profiles/builds.yaml @@ -16,10 +16,18 @@ builds: key: clade_membership label_key: clade_annotation def: "references/a/EPI_ISL_412866/clades_genome.tsv" + ref_path: "data/a/EPI_ISL_412866/clade_reference.gbk" G: key: G_clade label_key: G_clade_label def: "references/a/EPI_ISL_412866/clades_G.tsv" + ref_path: "data/a/EPI_ISL_412866/clade_reference.gbk" + pango: + key: pango + label_key: pango + excel_path: "profiles/pango/amino-acid-genotypes.xlsx" + excel_sheet: "AA_RSVA" + ref_path: "profiles/pango/REFROOTA.gb" b: EPI_ISL_1653999: filter: "--min-date 1965 --probabilistic-sampling --group-by year --subsample-max-sequences 1500 --query 'genome_coverage>0.95'" @@ -30,10 +38,18 @@ builds: key: clade_membership label_key: clade_annotation def: "references/b/EPI_ISL_1653999/clades_genome.tsv" + ref_path: "data/b/EPI_ISL_1653999/clade_reference.gbk" G: key: G_clade label_key: G_clade_label def: "references/b/EPI_ISL_1653999/clades_G.tsv" + ref_path: "data/b/EPI_ISL_1653999/clade_reference.gbk" + pango: + key: pango_lineage + label_key: pango_lineage + excel_path: "profiles/pango/amino-acid-genotypes.xlsx" + excel_sheet: "AA_RSVB" + ref_path: "profiles/pango/REFROOTB.gb" unused_builds: diff --git a/rsv/profiles/pango/amino-acid-genotypes.xlsx b/rsv/profiles/pango/amino-acid-genotypes.xlsx index c886b1338f50aa03c3f988a890db478c366a12f9..cb843e88822e9af1f8602fdc77282a9b3ef05433 100644 GIT binary patch literal 11752 zcmaKS1yo$i(lrDL5Zv7%z~HV4?gV#tcXxLS?(R--g1ZyQ;O>Or1lJ#ucW>^?ec#_} zab`GYS9N#QboHs-B_ja_4g>Pil4Hu1et!AS1@ie*+s@F!o=)aJ7^t5ZvyRsOo#$ub zuRuWF{EeY~K~}_eiTBbY25FOxdU7qJ2^!S8Bk5WbUx4--EYMgo5n0+R9 z2N|?K-St>p{a{X2{?MGHqpLZs!_I!{`Hh;J<*E#aUX{`Xo2)tz#dz+xA(}fjF~;LS zpEVGvCNR&eRcQ2R5M^7OIM@y-o+>)R;D|W{H^pG}{T3VO3`w0*m^1ZB?PLQ#wl@M* zU(rM-Reo#LKy2|c>bSQ}68Bt^^sF40`%aM>I=zl=d9+Ji^tCs{bzDqbuebbX4T85y zXZh!1Ja6loDVU?L$xU6*u^#V&6Y1JDW(a7T=6sjA{xZkEO$PmJ4g+g_89Qs6-&XrK z4D1=^V)3_G{|$K2^9*n>F|;)N8}`fk&4K)+G6#M88pWlbPEWOOjG>_kTBu;1k%-Y%OZ2!O>yBBG4`tn^?v#!qV3EJD&@ zzKjJ&=SWS87R5d4BKUbGU3CApzRX1)f|Qh^n+VZbntY1PB>p2cI5J6dXp2&|LaXWG zR|w9;Bv(n|f;EDo_cqVhpn z{N(9}nCo~x&glIZ6zb}lSl;=iGio8n_6*>>sT$=b0aM)MCq~^-OV{7echc-P{KEB6rEF0tt)1AF3WA69h)hCn)*|OU zYeU`BYb|LVoSpouyE9a`%!i#(=>0X#ux&}lRz)^ z(Xq=Ty_|0f0!N=*O$0A*>M+*mXR6s4c7+c_>|i8N^E@jZ>`hS>%6A8Yxz|ismWO(8 zhHq)f(3-VqKXOeW80G4S_3yZ)*Fux0p5O$8w{|3PI*R527g+Nq8drB4+90B7oPvzn z{O;a2etVZGbO%0j7YzT9BhV3z=T?yTRNJmmr>DMZUZmGO9C;PM=v2HDu@rBLlC)ON z2Cl~`nv!^T&+fCRp$WbFCVhpZYx=DML@;O+pA^sZ+K5ISo$M%CI&Hc5{2o$*-H*t{8L!4IdOP?5idk2b1h2Aow5ofEP)BS=4^!UPx!;!f&3$8f7&cHWpzgf4gS z36`Z}2{Uk>AuSXPcBLT- z0~o*C7q79?RbD**ih|C42q8R#eW<2U>?42@2KF5u;s8NTQ->LOi=sY>%(^rVpftn+ zOdT3@OD8A>`)+I{Z~+CQ`2oW3;{%%RMeC0gAzJWlPb1UHh}!*RTqIWUu-c4k>l~d) z>NDQ_rs@3o!tdC3Jn(o52&Gbi%&W4=Xtt|goC`|QYd z6Teu3Z~F>O<#T;e*Px^XG*^*ON=pSFv{EHDFeQ&n+R1WhonAZYwhR%MXM|N}RvwP^ ze80p35hjRdVaZYbfXv)2Zpg~ws!8(|ON@U@ z_-qEdd?H+DenWkGAzdo$QtEe-IOA_G_=>a@qW6vCHuisa%PmJoVyNqij#QScXLHMOyjn#Txw}$*Im=PPV6}5xY1bUrhLO#x@2xBsznBld)zYMHC%~E9X%UaU z^%rzKoj7_U+D;p;Bx)P*c`F&-<+;@du*Hj?*(2xz9r71FMl9pPoF{YbJhF85%Pm#ly%BJp#){ ztg!ZOM&93Am%M9%HfVRIx%!^}MTiNxKs7x!HYLanrAtcbd8 zj=a$SS_}~~TKlNdstCCERdvC|9aTF9| zK&F3f0j}u7N^b1L#EBUh#^z}GM8n~v=68}>WwNJ4luHU1C!r=s1d*$P(^`Mx-7#Ql zsKLt9+UYdnt`=j`L1ufK6F1U3Z0OeldNW5V-?%ifLc>#%Fox<3wiyR+*&Tti(6aru zwkBoWO2?;Wasiwb?r<5Ixv_?0V7U`&EZ7`nfw|6E0ulN!^N8pOD(IWm?%NSyHt({Y zAT^&sHWVx}5m=eM!F2W-I+>mbv7hRzO)+7VSZ`qIe?C?M2R=WJv!8VMIGk`+HIF!td5n|p^p%QRW6j9p}aI8 zj{m@D8eRpphg>{+ny5OK-A@Y+xg<@QOG{QVjly5+t*fD2a*OKEZbH|&pgFhD7H(2d zL(&v~A}}G$2Lb5-e?R@o0~a9%%Qds538bota%@ywd&2=hm?=u02Apf)gqhs)SZg=Y zdg?-T%jZnIh1N(qN!JqrMz5)ZY~wmLJ|R`FsjZ}vF8TExg!Iu{A!AarHuo@6HbI1| zwx2S!*7|qp<>4toRbZpOKz#b>EngrDx;cH%bA(OAtHLal^7CWaKwD}|{ndAnXHgu?@`zJ2i>;+K zHd3WIE7!iGDhXr0-BE`edl#B?KN08Aqx5X1t*9)=&f(9;NUgCa;CY5EF) zT3kK5wFzgIbJxvj`Fr}>1AQ{~$Gz{~D?ezTodr`M3}|J*#Bi>xi(<5V!I(rdYf~JJ z5D}0gDzox%_-@B;IpwxZuxUT*4gaQaJXNB3Kl(e*yYN`{tsfM+n(p2-7RRl9rsu?BF#GrmPhIU zk#N$x2uQdgZn)&E1cxW7#lU5%1Y!ZC`J|W874%>vqaf08AfvoJ5nXoHArHGs$vH?y zAw^=KM#Z|2HY}%)_Ny>VjNK6@%s!;y?cJo8ja+Q{B%_$pC?KOM16v?*bMNcAB_rrj zxZ%oWa+RUX zKT6tHZI~SWxxk$&f{+?bSCy7Yk>LP)C(miGA#7q;4k_`15e2=cUH9Ghq@!}EiWdky zlPQA*-cDZ7-pU44Vu+3t#HP{C#vB*yW6hVQN?I4$Y9bP9I+@X>zE7^02k!mz%BzqD z0h(P+>)WVK2>Rdr#AlU5bV{IF{A`)LZ7m2O`?5|$JLzc>bZ6zD-ytn(ule)#^V>7* zGcyJE>pjli!PUah{wFb_v?{$wkNBue-GNc$%c#a81Rz63P9yKiZ~Gh?r* z@YG(LQCE`v3KDo({ch^==BVYEb{8o1WmYcR-N>v`(Jxp8Nm$a5bGKCTWOmWPCP>z@ zWVmD^jLfCS%;R0!j1$S+SQtY%2=6qsma4iGR7ovbT;02`X!17lCL|?+d6Z(BSi(OR zu*CQ_C^*St?E=BN4S2Y7=1FjenDWdu=kg>aa>MZcWAKfdiKdPZ{To9OII|ePta#`B{v_{gob47;b!aXdMjsA5{KtqaiU^}gh z3z>4WJK>EcJ}*sYMn=2QD6t3O(L&O?nGaw$26t@OOex}{gT9MXvG)s zCdF|cL7a}7<0Tmc0ARt=R(&fF=f*IAm6 zyFbk9kUJ73x`#jRKZ2#uhkd# z7jDs5g5F*aY~S~|*rxRR*P3{-vU3>ab-6HW!wSE}_|ZphdYh*dW|;(fy4EER)IacNj*PPBfw!YycJ8xeYxr7H%E_o|d! zK1>0!BHzZw?7{e`nwIo#OTVGIsEI#aUlV31Y%6^Kts>=0(wCcz!H@B5!w?7sb6{aN z7N3_yIwS0P5_8GJuHjM&TKc-+24>1yx{Y47+8 zSfgr!A6pA(1H&d&D?%)XD%eEwZ`gEWIvp8Q7DL!@32Tb*YR|Xn!}v3^ZPMqCIF1wO z$W_6-YtT!F&FJ~8u|H7KlgmJwgP3VdjtS+JACJ*0kw zeQEQ#J=o*+=0pZ3 z>7c$Ia|~W_Y6dN)&^%;jt(}Sbybp(1dI`cp&E2e#_J5n6*#M<1^4av<&vY;1v+4g% z=5=jsUhJMBKVtP9jqcHWcl#Y8iJ@S_eD|}(e9&3U26)ib%?v-~i25xRJ9V_jgHzV{ zYq>3E;K3Rbpf(OH7{CP;XN=9?&8HihBmnAowhm0M_Pz%h#&n|Vr1zy~(ATI_vUa(` z<}(WfGs9Vk$ znfigEvL{Ff-IUJQ`v`WENOCqGw(??7v(%SN88JH&aiGo-29|*sUhZ6F)6v$84`ya3 zxtFJrH(mXGJ=&e!pkPUQhh_+uq{m<`8bzTUBls1m(C<)tjV-e_<5ruw2yqi_;KM8- zK*Q@AZN*3k@#WOJ>h2vOE`9nsUDZvh3|GCOQXU1I$pmPx_t!@{{&L;FnS=ju@&D^2 zM2&k<{QI+Kg`Pe9{0jX-+5dvQ&8Bd(aN?o0&D|ER{=@)=?%e zqPeldF&-6W%$mzwhh7y2578n7a_G9KuOa*wIXO$)iTE2I+GXZ<@TowC2F7oGTZO)*6_%ZD?eF_UJ)I1zf^1lz_SRDJ zu3*YPdP_EA3n+;&TjK(iCufM>T;tkz;P>c9FDgq;sdlJO+s9_7&y2k`$Q_46cn@dC z@EC0Ummz;;7NCT^Aq> z5Xz3xEnbH?B5@OjY8algn**YJRW|Fl`gT^W@uVVgzv2woR0d|H3c!_>?cbM{W_kR` zVtcRk;Qn-UefxNFj*V|Tt9;iJewS2*P?f3ml$Y@yW>$FsxV*l-9DNo10Nl^I9N4$c zpF&#N9S|ip}Qwa4@ttUpTMTwm@(s z{l0DaeKQA-yPd%CcH`|sn)lJF=CZf<^ug(VPx!;>?$g&+FUKaXHt)uc(v_^E4=$rC z9Uga>UXNxB8lQ=t&bA>)4N2T{WvVR|Tu6P@?BEI=QMY zKx53?%Yn=)q2wu++$*VWW~z=anhAtvEbnJBEX^QH1lu5W={k|-7$d5|k12gkf-J~S6<#VM8IIGc&7GQHc|;D_;*# zLo0=zqB2>T-Z9}azcH#!fqET>vEVUofD|9KDpaa>hax$HF|z&73oC;$iajgd4EYZA z$OAgUO-SIL%6l4o#s@r8%}t_hYlO2FkK2j=A z=DkjI;WRMsLeL6?dzYQYp&eani=SKUu4fVMHLHg9R&Zj97#*>SX`ZUNuE~zxpvOe1 zpNZ#Wd=d<1Jh`?GOk5pS-H}|&e2OvE!SvzwssDKT<8$uG{N^iJjok>>e6cD|m}Uod zBhijzs}LWY+#xU{uS%Fxci*xn5kwX|&(}dyHD5IfBmXQ2%cw^^r+d-<;K54pfRYUj zx<6;_D+1jvV$^qYDbgZ!KNv2B#icpMlwE_6#3KT*O=zqIZ2ksarwCenOk=PGevq(W z*K>7F7)%M{A%APtK-PdW6jru}u{SbqzF9NLUf7A5$D@>TDk^)DY#5)@e4uRs8ab!I z2_ZF|I$a17dCs|F@+WEm0)Dc+7u31r=33kdSfBP|=h%>)e5D*O5h7nOhI zT-bPHhjO>1LorQe95EV)XU%JGWP)}`Ynu5=@2JZP;=w27p6>u5un3#Ds6G9MGtAst z-2UOjr=-DA`>n4Ba}N44)ASdhgl9yK-0iifUd91T=&>rA9Fi3n0E$tcg>_}jJUGJ= zG^0?m%E-&i3QR(Mi$E3qp6dRt+wKf99QvIze_vL3h|u&rxPrCjZ{eHUuj(v&lN9d< zk=%|(vdTnblk*6Xz@FC(Mxh10c!5o9UI6712Y?dBcO;Qnn~Mo=rFeHnRellfH0JhLg z*D;5?mz~_W7&3~n7ZMFLP}HK-vYRr(8fIViNi#7m+Kb5)7CeBXI97TeFT=@5BbeN1 z7+pIRbnwsSTbhi|GD|g~frp zx}wyVA;N_f^C5M01gxXsMBtBaTmMw~XBuyE;6v93_@QLSHz%*IzK znRL-ZO{EA(%Q$85r;*wujiu(Bx_-6`wa-bH?aM67f^tKgty=DNTOvtZR)K#yS-fgCHMRD5yy^ zd#YeajM8Y4lz(VFov9W8@55&nY6(su)UE<}h0rP?(}N0_v3TJVu|O%IiysN-=vqm@ z^Wor8ST^{I<8s%`Gma`%uRZ#Jg2H_q!@4o$LfK0Cr~_rd&6UA0Mt{uE|D=IqzH1DF#vZHH^0}gNlyJC9J47I zel}v5fS-%LXcEu(73^Y4sbru)|IjL7Ll4GM=8a%o4sb`(V%Wix`XQZ)pzjW2NvQvL z4rL;s$yu<0c=Cg75rc9NjawygyxtqDJ~i?YIwTC!85*~g!1#5ve6=$0go@811ab39 zAJ1E5S=-Dki)_E+!BIF3eo;rh@qZxFJD0z7KcY$`q+M8JC`hotgs)IQpTTvEND(Uo z+GT+rM&HJT@GBzpO9k;eM{3Ph^S3qEK~R_T`uRlxYl(Z&#ccrtxI~O5azMwid3tcU z^HH}n85{jt(xipbl5D|@_b#^rq{IR{BBTv7B=VRR68g=`u=vP3iw7-87MRxPZM zoX{+oEVKPAnIS)%K}t|7HobJh>>|S&62O`mFLvIYT$B0fi#DX%(r0&P5KQ>iOL=xT z?Vq_vj3ymgg3fB{f>4RQx07(sXE+6%fyd? z)U!;U8iya0-a2?-C*2a(>SuimWTu59+?>qb;xY~0H2Z1>eB#O>xWDZVb7E!8l6OeE zE$i`eoa-p;xdebI0GHGl@*`faD@^2y4xq8%UK9JM!^L8oAZ{`k8jaS;g?_l0LVgIo zw+cGV#uV!)PMgJlPJ>3YoH0)z{|p94jjcp??Y>)3 zdyw)WCKZ(cqX#4oP>|ukRUh~0Jm}EZuWGmN5%{6&H1xY7K8K1Avs?BZ6PlM^^D0xG zPYvdhOOwx+zG7A9sjC738iwIQEQ|f-=~f6h`5Ur46In=0fT^)^NA><19O-$g>svCa zS+pz3V*ROVHkI}ipyH|tL<2h;B7P7yHOI8MS%K&4I`Rx?tb{$9+a1_856)XnOzdQ0 zr&}vrHlrdHQhi2V40WpdmTxtV!uP%-a8_RdrcE3`aUD66l<6jBuZJ!c*!C#m!{RRs zcEz5&wiX$m4Ll-=j7U2m^WHWZ(t8dI(wqreR&*^z;nVh6boojrIqf7<$oHRc8b+X( zesJ~YzQ|?@dYcl30mH6EOM2>rN4Q5dJTqxCD{hf0q+45A!LsMZ6HZ6W#Wkm==s{To zim|(!O*nAW%CfcVl;N=Yj^A^wUy?I1f;3^V0Mu&)>toe~NBjzi*9g(ca@IHlMRt)n zz-;Ig8uJyR+8$>a&bDDba99WcX6saR@6vx!VSE}dBkj?$#?E4{z>_;gH|1rJm1i<4=3c1QyNwc_ffk? zfrMThq!Fz269XhQcR4p$nVXeO_t^dFgBfUACk}a5|&;`y0q*At8lYD5gTD> zA?fjeOlVDs`5Kfgd6BjxSpG#m!!{9Zh(92{?W82)j;S}NnF}mA5NX>#OcB)v9p%qt zDTAILb=s&JWXD#JBl346lsOEXjVKTV-vRR;5ifRJxOwjWs5g_a0C5d|W-(qr=eP*J zGPr+cw;8gNAD#;m9ugK#sFr1*5LuZ}i9=Ij|Mer7e*)~U=40w1f_6@2~VWElh2VCi#2yhq2KdMn)1THl( zBaK(k+yx{v2m7TKQt^F$pYj!=9{xKELjmNlB)rZWJ~h4xB8Y8d0<_y(I&7*D7Z_=( zWmLjLSwa8o85(OlEo@u_Mp_-)*4KMd97{G`#7ix2j_8p~U+IfPsM}~PBAvs0t??)b zY_WSw&q2U34S%iImBVPfB2ag?6Vp{C}m8uZn=84ve|fTe@VD$ z5x&*Z>;`?uVf%9{UGoVixRQ6wk$q!HX7xl{m@)b27ed!LPO-$}TA>XxuewIs1@+m( z9)Eqy9>PuJWa)M645LIYqVOHN(Ljg?K^je2uy=FuVXB9SYu9zdQY5AU5B>TixF6qq z@&*LgpPl4$-xf$#O4|VU6!I)cdh-CQTj9Co=r{%S8`YR_Un z>m⁡QwZ2ehaw&68NiL@-MMp^ZK)5@;@Ctf8_pC?05a-?}7eO@Ml%dZw213|1rP6 zYIJ^!|9jXkWhlSe%RI-Re$Mm1ic#sIpIZJrTKTP|3;Cy(-v=#!ivGz${}!D@`=6Zjp9=ouQhzIu#Q28` z{==;Pmj1T~{`&{|8tdoC|M!CUlivH?DeXVS{uvJcSRcR4|0fCdTaU~$Yxr#be^Fw; pC-6(dpF#My24srA9rzdFC?f&!Qo8+IK!yYY`+U0JPxldbo89*loHKv) zTs<>Y(=$_d*L2lg@=~B+C_tZpAc25@2!W6=HaCocfq*c;fq;;KAVIW*>};J(Y@PI! z-R(^rb?Dq|tO@eLKqzv7Kt8VjzwLkV8|Y7*vh8Jn8N5iigHx$e!ro?(K@RSF!(lJ1 zg&4*XBAz9QfDLTU!PZYNh700fEmFXKe)e%Nf*(2=G^GwRZ>JXNItqu;K4^5fR&1Bc zx#o%^q%29gg_EoWM%kOWU9fNc?1yQ_MI$1Ea$eM59wKV61In?P_P9*3yTp+qpaRVb z!FhCRYjS|=Uy!K@%ZSC>D@{vlB4_p1P8zPSU<|+3y;y-l1!hGhB2A?x`wkd!g#$M$ z#;BC(#vVkdo6FmHT+>gvA1w+O6=G9}(^c-n0N5Oq#6-ehpbr?t!AbIEq(L=QC&Q}c zb|VN_R05aQifBfPVm2T0Zs!Cj6c_m##NK&HJP3d7;d4OP;)?z0?n4x;zmL%Rab{{!X!VkG|4 zqgTeu$n`P6gj|Tfgbv>?Y{Vi6O1TP%w-GA)_(`n8H%1qbU~hDh;~*$w1%itCcKE!E zu5EBf9}g4W?XpxxBB5{*H@SWbNqKT`hM*z?B#Sy!?(`$NEZi>KrHM(pQ@C`*P*r>{ z&6670A{L#y6s|>>qEo~Aj9i2hg2t2PuQ@2Av1#~F1-u}rboMQzrkOqWIAJQocPY8} zCoEqWr}XJ;8tSm4q1j56*RVC=?IV_ok~ybEjbS$63z55?v32*ga7GvMs~4?o=CC3W z3(^DAl-Lky?yaw81Iy)DmU}<@=e~;JUn2pL_@$d4e(K*rB3V{!@bvLU#6W?7uz(VffH9dfR&-0oz-7H?7uMs{NczxwEXYh+T$lI`x#(@E`mD)XF43>-{eQXcv1#; zG_64})FOq-SR0k`xj&~Tp0m{yrc4)*uv{~an?7^i?BWtTZ&Fg!;efTSW=u9h>E6q! z-4ld0GPmBtp+^PTvryhA^)Kjv7gcR128W3-h=kQ?pfX>;mEh^pG;|mm+2@dQ!)FDwr7W z-25f0)FF<@~eFRI2FMRd679>sh>mMm|Z} zAPYfS0>FVAl2+SP(wo}k;y`6jR5(T zV`j7{TH)uBO}LMwUwceYFKaB+8ABCcA+lPHXe{n$R@Cvc4boAim5$Q}(989ds!_*X zmCCNgt64%5x%Z$<+W~5AJsOyJz^ja}%5=0WbgF<2DWURox#Z_4l-{pu%SpA+5bi=a z21R?9g7JG8mOIFozW`Jslow?>6c2%-n+jug$uSVG2*ESbA%o5)~y`@wMgJyTaruqC%0;<#|c!@1^Uqu%z18XY@rO_ z^d3UNSE~9{QEH?Z28fYm?f{)}qJwIcwDUMY>wSTFCtt0_v;r6c1RXRO2TGia_(R*i z`=~nN-mz2*>fP)E_+uGF;{P_b)jM9DZU@&7tfxPmHA8VjKP7GKm# zL5qFMI-9$4#zWC=X;~($m7lSKKgZFUF#t{b96${h(G0QNgMw?Gj*C9+E6*zBT#m4B zi9L1q9^)QZQ@BNaFDe20rYEXq@6mjUIf;ala3=YR1UIJDK9GoDh9RuF>#jr{R%!#T zbgIG8WFa^*off~3EljAx_f4bXDvOup$8sS-NWnL&rk}2mcUo~B&%S+aF-19iFt3~J zxMi|W9ozI1ZSgnr%iLj96?*)9P0%y#=Ow~&daV->ab;fCt3oN~OFoCYj1YtU&NoEL zQKoEcTlQfPBn~#zE*93oiHg+ryZ)@bY_NiM7%Op0jDiq}t=Z7?ttAT1&PZU;2%l9N`+j_DgIMdZdooJ0@o-V(+pk*SyTYHGoO#`2oWbqG+$@XR?JPeQINkPIa z7f;jO6iDTW)ok1%btk>9Aw~oe;weo3?5-_kucM8Dv$m2X=5h&fKneYt z{H;^+^?gt9CsxST)=av|WMR5gGOs{&>ZJ-KdpPYtw;KwX1 zbMXYo(&R$%q*e<>E{kfo)c$f`f;}hWxwrc)SQ6!LO%0yF&)=V4HoRTQT5Q4ZvrBDL z<0+x&LG~3iO5;atB072Y-(cLM3F}C-CDQXjZKj>?E`?@OK}+trB%>h^>Lg|+Q$740 zlukBcKBvzIQdK^%|lxQFmN`=(Jc5x5{ZuUCwph+zamqQ#( z;ozX%^VQdFi)M}c{4KF%#djKal*M!_LWMGK2RH*zHCfA<(ETD+RB8h{3%Q0Ir6Z7$ z2)f`YqZ|-;#yS=*gwyq9a#EAvO(cp6E!qXoihvpYOKE#{70U6MnsX{^uSes5l-vt* zs1pU%7s_i}O!+yY)>SxR{Ij^$AE8B7v7KAETHEYYz4b^q374nFg-4Mc z%MXx%db4$gN69BYjixDRsAmI&wwbv#nTRW|PN;279N3x2bBKjzYW8p5;@Qks;_iQXWvM%AL^zZlu&>kCm=9Ix$PQuF7Ym}ysZg~h6?PbDxR;y(J5vWiDx zbBpV&(@($d?$V2nmL{$&J3B{+ebH#l-uB}M-?(+FUft?6tCk+Q!vYpAX24(Zm+GH$ z_>F=SMEZ7+mb~k{BkswyB1AL3c2*Eh<2mMt&21~;eDd+Z406WZ^XO)fnwvQyP$7_4iz*y&rWfBGi1ct%Kl%0_A=HD8&?)-? zpFx7ZLJYs%tdqHkjS2nlmhmqjooY-(VzZ-kp+E7#IJ&;D?!}O9ZckY!Y>*gdCE_$T zohqm@GsU-KBZH7~-YQZQ7NrQ<@g&ajgTi)vV3?%gC;kli#4_=>1Ck7DNlh2nNU^t$ zl9KyuACGIN%h`@p53y9}-uPs_PWdaI%&6s5lDTAGD-H@4FFEnVC`6ObwvW&>FH&nc zJ^)0U5y+us&_`kGykvX{&NV8u0mzWJ@7<;x{##^GL*l)}IcP&TxPhCgXw*fD-!O>d zA9Mo@m?94?bxK+2+18wMgrGsn0lG{p_)vC4dan^eoZ?E!7^n zJ$cpaYMLzpHfIS)DsEc-a*|{lYXB?%w2;@PcA+wcwmGe*Q{RGH9uvHjtL$47f)CTW zEdpIFK*_7!7n)^SVs^5Wi=wC1we|h$aT6!XzF2aCL0OJ2%7r5$UEE>f8l(0{Uzo&$nMcuS2_dd~c2( z`}FEs8mXjgAxM>!&KF|z-d|tQH}$&SejQ)3|C}ek?CtM*c^pgWdcXW~TeG{4O5f>y zb@ej=earjyP%Z{{bqxnM^%r4y?XRjGqYJV~1An;ZOJKgNeg?t*cz(nqm(ugGVGvy{ zg5zH2nzPqb#77_MKo%hShx{j$%N-blD^32zIZ zjop+=TD_Xsw5jeZwOZiuGA1_o7zcw-SlPkgm^q!osUoK^BP(ua zr13rzz_&+?tcspQ*dv1S`pJkUNb1Lz&|55lE-08ixMw__eztfiz|m|=BnERFkIQnj zJh$Vjbo_9?4;dVxNSte+1iDwu3YRdC5RQBu<9hSwrruXmqVM4Qmg$M)VNWKLxV7z3 zX&w0JP60KMCac^{%Z(G^%v@it%Rc^MF_fI51~ELFc^%cK4VcFcTV~XV^pkK;B`c|i zCsl4U^?^;|%dHkE0V$BwCij<#HZ^8;4gqdx6ybWB38HhIoE!7#lnF$wOP?A2{w;;6gu@cvwGI;YE9Z8B+J8RD69vIgx~9-d-eL(nN%WA;no6^tNKCuuK*mST zv>G!FqbX#v(VZC~eXH!oOqOd@80t)onlt_FS@zm3CVEcRtfjIwy$l*W{?Uz6J6b43 zviL}o=VJ5Q{E5$zva3eQCyJFh3ZIOEbRtI-W zXN&R~i>LIoW*}zc9mc{qqQL3_kD<9ax ztt45&>FvOej_CmXa>$NYsIE>=4e>SHFES8tJi+LnQG*(H@aVd$KWe_z9x=X~cWOE4U5DHTJO09`<0p$BX2kr!C96Xm*j@gskX!AXOlIA5eN5cLLp8wl%%~$M{GbVjZyIM|cq%^CQat5BK}mD%a86#Kg&w{`c;;56(=G zidkhq3BFHy!l$`Nhz&9m#z4fFUr|;miD}$hM~LK*wYg6H`n2U~M8J%M&4RQo#<9h@ z)S1SYX)FVm8!ay*DdfxlnME5lf2H{5FeaM4s9${%rBW5P@CF>lTixr8g^#L(8P$(e z++13c+G>pi5~iA;X|Dne%S(}pHf^A2^HcFcHxzliGXm_VJ$|N{X=cKidj@3aFXHsBXdm(=?AQf-cX@Y2?>L8 zL0b=&dv|u3h1;5#Dw_BsJ)GAzkM;U!44VnICtc9UaGj`2xAyxc377pDU)fKFOwdF$*dE2uuLZYZT~!ca zO(atc>LNl})W>x|UyiM31sp~v-(aEH4 zJh7Lfz{#z3NS#b|poXgnzoQ&a-uYfEF$q(t{Hlf0B&Z~< z$Ukz|_*(TZA@*911i$jk_3YsFrL7}+-*Wdl1RdPLTiTQ@e2FP{X$ zh&|s(q4$1}%H8?2?hZiHJL1qjOpLNlIjH4ih(o{Y_G8F_C9DsUczyK~-HY$<|F_>w zqWX;O8Uw;}5&?dVaQqpI2$sH!FUc^-X&H8U?#7$XOi?oF72I+MDxG1f~1b4CG7W4+Nd(bgi=4d~Bq37hTZsMLZHeZ2sO?P z=9NBc_nt(V{`k{o54Dj$I@~i1qV5<)KZVi1EVpPRI9cQ;H)mwwR7)#6a91!~k! zc)SX928O;z`qWSci|OTy0#e|ZJ&3zgaimfiKKZ#ssAMqiPWRO(2-Nci=klx3F3xbOW(g20VEC(svWDk>Dt6vB5*I;R| zEM*->kU>Cf8&~?!tZ6W6VYy?hP?PC5nXZHQcN6&GxsGc;#=`@bu9)AtYhLEANZq8ifNtYB)FEo+HDKn>1R1X@~0C$aPB}e<`b(n23ssqQQ4PPgUf= zy><9HUSB;O&*X7V!olCEO;zP|wR>D1{lahiso?Hr@961%`*KxsQt?{Qmvhe^y2bN$ zqKCJ+zK*xuti~P#=iA=lt&^3x&i(B0eth-g;pz4bYU{=R^~b^i?PjOD1K|2;;_d2j zBIfz^tRhF&I{ARSnods_7mvHm$J06T(8Jg5*WK|!*XFrgn_OMr5WJl(uaDcaTRjh- zTWwcs=j-F`+s??@UMTqsw|BGmd*-C4$K&+*yW0j-O}^;=HV*NJC*nToiP6NouW zvr1f^R267sHVjL(Y$J6v(?|p52i%adk@57L^l`^^Oe4$J!IiX4&pu8pWT-c5OPQwq zEiI*A7fXvCKAX$>oRikWk2P8KF0c9;3yUkbgDtRmJ#OdTrHpLuKL#<_MSd~+=xTDi zYDCNIjDyj;6jQ8A(i`GE?yj4XeZ{LRp9{mSJdOLd*h7QQqNTJz2JK~15lxN0?#bEP zwOn_Y%|3{AUd*;>zr^aoctH0J8ac`_!%7WWE19~z%U)x0gC?to&U9fhZMfE9(dK(a zF6sg)-c*sPPmqHJU>V4>o_CLfwYa}w?6`M7FSSGllSaA?np2OaW&)7lGM5TS(amzl z32{;Pq!F(kG+}d}Rz_dkzlJzDZnrP!WM1xM?uOQP$8n=DhoWiP2hM4OfrxAB55&OM zV{@HaA4%Q|5VexF9Xvxem{VZ5Dsb1eUkx9*+NT(tQD_=OaYEotzOL3uwov<|m(3k2%YK(`v$cNWb4b5s7(jAhb6JBR%h&7 zJ+}K>-IsbOBSmpx^H1jQherU7=u zr^zzlKzq5q;)3`UFB~HS`lUz7?Dc*fmWX3#41~DyqE;0oZX^pQtJEM6Zd6t4(}>Gt z7`YSMGNNk@Ei8|*ji3)%j5I}aBjdDM4RC5=r~XYtTUZXZ-ge5>g0wzRiL-}4kc&aK z3%hfiC*#7p!JBjA81Q;Jnw3aY6A)ai3|y99CPo&++Gte2Zq*MMF5d_<5GBBtp=6d= zKcJD4N8wWL=)tG^%o^B=9LHeYck3J*@pONvR}W{G{r=@gwlQB9AzkSGh7x4ajk9Sx z>1v#{y|r}|Iwh_~&DS8T^8oYfiM+0zGyEX)eeck0b%-}%UNjKq)3^x%9F_>4J@3hv zK!tgDBTl&s5#g`az)-^%(6x3^<){o11_tvl7=0p7T!^UcY7_po#+`l3lWL~pgJMl! zQ?urC7n0jun~^g+o<#~#`K1^F%;)uYB4L_`%dNvS@3i_|h=qK5Cb9R_pud(a_<6=b z*g$<{!D%_n8m|bZ_Qyp!&y~ESlWzO)!DD+VCKi&9DpwA}@CP@*-1xQCF%D0f5^o<} z+}_?S*ACXdDVj8goxyYqF&c!=N*M-(6KeVBZ|eRW3)TRCL+b8t1rhQ4q0vAz5IQl9 ztfJ1wX#qmP&p)eBnfpDIU*N8j1@5|9ffX_**BfIICqsWW41}6a|J4y$-)Ek^6rs=B z9={K~z1+aa$%TSMGM|wJ8U>t+*cIFfF&P>z*h7(7U6xh~UA2-bzsTpRwgfHcn`f~`ymde&I5aUqU27T|Y;FL0t{7)?HYkU4mQlntPXvmdZVwX$; znEl?~!Qi(M+B2M4_sK~p9bGrW9+0&yB8x3*EAeTY&@DYW_mWuQ%$Ybz8Ntgn<;i$y zmO2ZZFw-ihSva>EF*xz?rS_SPRG8O4M8@56XqkG|MxX_m)h8~JU`Q`JpVR$yz<6xN**Fv?T} zvpZa!e^#D1}t=No0RtPF38i7ft^MevM1c`3?RLv!oKo@YPyjShr z2@2UDd_YE6=iEF9?e$0BK?Z&ysIz@WD{=8Ey{THTl{yEQ4AV)SBClJ;AQ=yg*>&ZQVxl<-S zd_NaR`*^Bhtw=EbHh6?tUuR$r6($&{TF8EsYUFfD)EprwOtKncZSuT=$5L++hi_0( zq&_`N16eZ4c7`A~44@?!?q{QwtRJMfEW_|{SG7naudi1MLCp&hwv>CY&p?B2X$ydm zWA~Y2hWW{$SDp5#$VvhrJkYT2}ZcyiEx}oPAVL3YG`7*Kw$s zE6XPfyB~Wm=nQf6(n2?<6%A^Aa^LWA5E^xVIQdRWtC1JXZpS&?MB$JF*=1zHeAkmidJH9Goh=V0NtXc8UKVKNZ% z9JGTenb$lrS^bLr#`-{s^F+jceLuB7B5whdUWC%)15-wk*}x%bG;;rd6O1+vED|mZ z_?nu#|3z>D>}^(=SgqTZ`-8S1OBHNm$)xGx6UM7+?gUmFDFQL5x^9>tec{aX(2iPg zRf#&F3@7k1JECtrKznBxub%v)4Sg*2j3T*lcO^?bx&PaINs0QPvjUBjp;{gpyr=2< zYKJ#0Bn~sV>r%IT|6+8#Hcqa5#;X5t4bj>Pnq{T`1yX!gr*DL=nI;UjzgMXp`r+C@ zu&O(;LL%epL_GY@j?*`k2!bgS=+eXQePnC*aeJ63JvW^UKc9O6+(TrX`N>yL>qVwl z>nl+iHuH4W+Q8%ki$-}q@K~9I#K43<#h7s^=S{(eR8$fkX951=PPzhWITc^NV1u)wW;4{A&>TALiehNw2cXUz3`_6_&s zLb*rAE?Ui}fsyX}PS|_rZD~btqqZ%5Y+EN^!@~-B$rD6Q>o$ zmp2n%^J_&t(47xFdj4IX8z^9q&>oYjt^4Bh4o7De`a}jcbTG8H5lW!8do7;~vGjPEv8>zpgJT2){G*f4N-)b}*taaOzy^P5OX=RHE-#BJvs zp9lWX#H81#XwpLbK?_&}3u@k^tM7?<8Aux|(e{s^D|!_H*MYhtiIMXZ%?s-Vaz*j* z!9+tMX%v#KU%CJzpgPR_!MTu_aHzLg-s*Hdqfelj9{ye#g(YEOi>AMT!e0POa&oK0 zbu?PgQs$z7!E-Kpi6VEacBjIe><+Q_;_iN!7U&hk+t?a7$%uFz8zrp*u1b^Xvnu-U z>3HOUE}Si;L%iNa;MEuGwjlb!@*L2 zaNCfpiv zDnTjC>#`D95Q+y7P1Dd@PUKQgM<^j+5bA0PI3m=zA?U!U{;V|8-^UIZ^+ zweh4~8bKh3TPB=&g(Bg<4!$u0K`McIY*{>gC;D++717}Pi%fak)#t7 zx_k|VB@RdZ`3BkT>t4*I*SMR$tF?MuP}C}vDa1;OK{ynd&=#&A5F3COCb>p)yFG!h zqLG9wFDKt{5*kb?RyZR;A}jC71T~Q%`Z!C>__AG9US61$v@q-rSglJ}EbSNAK=&Kff<4eZt^zOYwVR zHpPzZ*oh>ii}PLF^>|bZ{)TYGjJ70IcZ7qkkkapaU_HGI$6Hv-YNz+=`2J)2DKzj5 z?;Uyfc;zkYeat@K-Tv~S>pw1*ttXl0dOm`fs{j8WCevSu#LNVln2(Hn$i-hl%&i`N zKT-(EIJ~4 z4EbPR3@le{sGjzosVFEMvRzr#iUT9EyJv)7*!o(x?K9S?fjRF;#FzF^D`~zxID-b@9;t_uqgE{`elUs-;( z(eAfdn%8vjAqzZ(D~saaSOh6&^1U{l2Co_gH-R$lgi^>| zy--3$?B+pUamWN3Hi`#YWDdxUS;wKQ+|XxDS%#BymlC0+NBBgH4HV{D^J2r^DE0Pq zU?%j$SQ$0-^)fGpBkb_|V`K_&gz(Vtmqqx9TAbfQ4iJK@Bt5uzr-s`O+i3iY{;`Z9;o6R>|9TER(zQ>8qaL(1fjF{Iv67sUvcenFxQZ3{u$8)C%M zjkUlyq_mY29l^@f8}SzpU*@#CDLS9NL=8|Bg&af|5BTg#wpSK4fEne%7b5iTJZ?kUz@5jfBSC_6$UcBnH zA7abda!d11gB>puS?@mCF++3b?!0GJjt&%&D4sgFD$ZqR%l*3~9GUjj92PwiOfTGX zz%&#x&FB8)zqaPKbuWPqx%x+hfZ&UYZ5DY>aq+&pUQQ#88}PY@d16eLDjO>3Lh9PDrhB#5?!uBJ=C0sufP-JB#)}eI}~t9un5ia>m7`oXmv@74CRpC90TS@#Jhu^$+}lE$LohQ8Y!zu%aW# zS!M2*GsH#O6W8XNZj;fPW^u>ia#Zy-)#6zo4IY^mNh0Osf@R}_ljWsl{V8oLpZBvqsqd95jWw{ySBmwEU7RHzM|E7-@NG7E_a`>KKhN4)oCq3eb{ZGwXy>ywc()*+C5vW zM!$z5Tlx2>)H?0S{@n-HtM^{J-x&GbSV#B31u!58`xdRhBZvz}QIcp&bGuz`cg)hV zB-`4gQ25fP9*wNE8=GdI4qJ^Sw*$l?3JjY%(SIV&%ul0*HVrpF*R2W=b2+BrKxKn2 z=9X+*mta(n%B1oirEvbl0w54-MkF6%K@DT2y{fP4lYOrWlIpy z44%P9GGn{7*|1@0!VBp>!gKfrI43>W%}w8j&P)+iqnLt}Xlp%lX+L<@EovVZ2P4sO z!SWR3Q!Gg)ku9u7H{XJ$$P>Q96vHn# zj zM>-JL&ZZZf392>%JSp^e@Pp5zPebqpvh~KV?A%*#Mx=i=lhgmr000J|{m3f)=PJMd z{(S$v{|_~P@>2h*;9o0|{#)>`ebxs?{;4kMPr-k!&iH4+-cP@CUjLt3jX(AL$<_Z` z)5}L8;~&iZKZXA!@%}AL@Zom;O1%9KrT0(KKgo-Kiy9&P7X6dT_@{zD=@)-1*hT-J zf&UvB<4-MrCJX=8;(_~*C-^gU_@{8KWyZ$(l= Date: Fri, 12 May 2023 19:21:24 +0200 Subject: [PATCH 4/7] RSV Pango-like lineages --- common_scripts/clade_translator.py | 24 ++++++--- flu/Snakefile | 82 ++++++++++++++++++------------ rsv/profiles/auspice_config.json | 6 +++ rsv/profiles/builds.yaml | 6 +-- rsv/scripts/excel-to-clades.py | 2 + 5 files changed, 76 insertions(+), 44 deletions(-) diff --git a/common_scripts/clade_translator.py b/common_scripts/clade_translator.py index cb9266c..5e12f84 100644 --- a/common_scripts/clade_translator.py +++ b/common_scripts/clade_translator.py @@ -1,8 +1,9 @@ #!/bin/env python3 -import os import copy -from Bio import SeqIO, AlignIO, SeqRecord, Seq +import os + import numpy as np +from Bio import AlignIO, Seq, SeqIO, SeqRecord def parse_args(): @@ -51,23 +52,30 @@ def get_coordinate_map(ref, qry): coord_maps = {'nuc': get_coordinate_map(orig_ref.seq, new_ref.seq)} for f in orig_features: - coord_maps[f] = get_coordinate_map(orig_features[f].extract(orig_ref).translate().seq, - new_features[f].extract(new_ref).translate().seq) + try: + coord_maps[f] = get_coordinate_map(orig_features[f].extract(orig_ref).translate().seq, + new_features[f].extract(new_ref).translate().seq) + except: + print(f"Could not map {f}") with open(args.clades) as f: clades = [l.strip().split('\t') for l in f] with open(args.output_clades, 'w') as f: + f.write("clade\tgene\tsite\talt\n") for clade in clades: if (len(clade) < 4) or clades[0][0]=='#': f.write('\t'.join(clade) + '\n') continue if clade[1] in coord_maps: - new_pos = max(0,coord_maps[clade[1]][0][int(clade[2])-1])+1 - f.write('\t'.join([clade[0], clade[1], str(new_pos),clade[3]]) + '\n') - - else: + try: + new_pos = max(0,coord_maps[clade[1]][0][int(clade[2])-1])+1 + f.write('\t'.join([clade[0], clade[1], str(new_pos),clade[3]]) + '\n') + except: + print(f"Could not map {clade}") + + if clade[1] == 'clade': f.write('\t'.join(clade) + '\n') diff --git a/flu/Snakefile b/flu/Snakefile index cd7e860..67fb878 100644 --- a/flu/Snakefile +++ b/flu/Snakefile @@ -9,7 +9,6 @@ wildcard_constraints: reference="[^_/]+", - rule all: input: [ @@ -19,23 +18,23 @@ rule all: for reference in config["builds"][strain][segment] ], + rule fetch_data: output: - raw_sequences="data/{strain}/raw_{segment}.fasta" + raw_sequences="data/{strain}/raw_{segment}.fasta", shell: """ scp -r neher@transfer.scicore.unibas.ch:/scicore/home/neher/neher/nextstrain/seasonal-flu/data/{wildcards.strain}/raw_{wildcards.segment}.fasta {output.raw_sequences} """ + rule download_clades: message: "Downloading clade definitions for {wildcards.strain} from {params.source} -> {output}" output: "data/clades_{strain}_{segment}_{reference}_raw.tsv", params: - source=lambda w: config["builds"][w.strain][w.segment][w.reference][ - "clade_url" - ], + source=lambda w: config["builds"][w.strain][w.segment][w.reference]["clade_url"], shell: "curl {params.source} | sed '/V1A\\tHA1\\t146\\tI/d' >{output}" @@ -58,6 +57,7 @@ rule offset_clades: >{output} """ + rule download_includes: output: "data/includes_{strain}_{segment}_{reference}.tsv", @@ -99,8 +99,10 @@ rule parse: def genes(w): - if w.segment=='ha': return ["SigPep", "HA1", "HA2"] - if w.segment=='na': return ["NA"] + if w.segment == "ha": + return ["SigPep", "HA1", "HA2"] + if w.segment == "na": + return ["NA"] rule subsample: @@ -112,13 +114,15 @@ rule subsample: sampled_sequences="build/{strain}/{segment}/{reference}/subsample_raw.fasta", sampled_strains="build/{strain}/{segment}/{reference}/subsample_raw.txt", params: - filter_arguments=lambda w: config["builds"][w.strain][w.segment][ - w.reference - ]["filter"], - reference_EPI_ISL=lambda w: config["builds"][w.strain][w.segment][ - w.reference - ]["reference_EPI_ISL"], - other_include = lambda w:config["builds"][w.strain][w.segment][w.reference].get("include_file","") + filter_arguments=lambda w: config["builds"][w.strain][w.segment][w.reference][ + "filter" + ], + reference_EPI_ISL=lambda w: config["builds"][w.strain][w.segment][w.reference][ + "reference_EPI_ISL" + ], + other_include=lambda w: config["builds"][w.strain][w.segment][w.reference].get( + "include_file", "" + ), shell: """ augur filter \ @@ -225,15 +229,16 @@ rule tree: > /dev/null """ + # root using dates in treetime, use 1500 as sequence length (good enough, doesn't matter) rule root: input: tree=rules.tree.output.tree, - metadata = rules.parse.output.metadata, + metadata=rules.parse.output.metadata, output: tree="build/{strain}/{segment}/{reference}/tree_rooted.nwk", params: - outdir = "build/{strain}/{segment}/{reference}/tt_out" + outdir="build/{strain}/{segment}/{reference}/tt_out", shell: """ treetime clock \ @@ -245,6 +250,7 @@ rule root: cp {params.outdir}/rerooted.newick {output.tree} """ + # refine while keeping the root rule refine: input: @@ -344,21 +350,26 @@ rule clades: > /dev/null """ + ## TODO explicitly relabel clade branch labels to differentiate long and short ones # currently long ones are overwritten by short ones. rule make_short_clades: input: clades=rules.offset_clades.output, output: - clades = "data/clades-short_{strain}_{segment}_{reference}.tsv" + clades="data/clades-short_{strain}_{segment}_{reference}.tsv", run: with open(str(input.clades)) as fh: clades = fh.readlines() - for contraction in config["builds"][wildcards.strain][wildcards.segment][wildcards.reference].get("clade_contractions", []): - clades = [x.replace(contraction['orig'], contraction['short']) for x in clades] + for contraction in config["builds"][wildcards.strain][wildcards.segment][ + wildcards.reference + ].get("clade_contractions", []): + clades = [ + x.replace(contraction["orig"], contraction["short"]) for x in clades + ] - with open(str(output.clades), 'w') as fh: + with open(str(output.clades), "w") as fh: for line in clades: fh.write(line) @@ -370,7 +381,7 @@ rule clades_short: tree=rules.refine.output.tree, aa_muts=rules.aa_muts_explicit.output.node_data, nuc_muts=rules.ancestral.output.node_data, - clades = "data/clades-short_{strain}_{segment}_{reference}.tsv" + clades="data/clades-short_{strain}_{segment}_{reference}.tsv", output: node_data="build/{strain}/{segment}/{reference}/clades-short.json", shell: @@ -383,18 +394,19 @@ rule clades_short: sed -i 's/clade_membership/short_clade/' {output.node_data} """ + # make sure all differences between the alignment reference and the root are attached as mutations to the root rule attach_root_mutations: input: aa_muts=rules.aa_muts_explicit.output.node_data, nuc_muts=rules.ancestral.output.node_data, - translations = rules.align.output.alignment, - tree = rules.refine.output.tree + translations=rules.align.output.alignment, + tree=rules.refine.output.tree, output: aa_muts="build/{strain}/{segment}/{reference}/aa_muts_adapted.json", - nuc_muts="build/{strain}/{segment}/{reference}/nuc_muts_adapted.json" + nuc_muts="build/{strain}/{segment}/{reference}/nuc_muts_adapted.json", params: - genes = genes, + genes=genes, translations=lambda w: expand( "build/{strain}/{segment}/{reference}/aligned.gene.{genes}.fasta", strain=w.strain, @@ -402,7 +414,7 @@ rule attach_root_mutations: genes=genes(w), reference=w.reference, ), - reference = lambda w: w.reference + reference=lambda w: w.reference, shell: """ python3 ../common_scripts/attach_root_mutations.py \ @@ -428,7 +440,9 @@ def get_node_data(w): node_data.append("build/{strain}/{segment}/{reference}/clades.json".format(**w)) if "clade_contractions" in config["builds"][w.strain][w.segment][w.reference]: - node_data.append("build/{strain}/{segment}/{reference}/clades-short.json".format(**w)) + node_data.append( + "build/{strain}/{segment}/{reference}/clades-short.json".format(**w) + ) return node_data @@ -439,8 +453,10 @@ rule export: input: tree=rules.refine.output.tree, metadata=rules.parse.output.metadata, - node_data = get_node_data, - auspice_config=lambda w: config["files"]["auspice_config_shortclade"] if "clade_contractions" in config["builds"][w.strain][w.segment][w.reference] else config["files"]["auspice_config"], + node_data=get_node_data, + auspice_config=lambda w: config["files"]["auspice_config_shortclade"] + if "clade_contractions" in config["builds"][w.strain][w.segment][w.reference] + else config["files"]["auspice_config"], output: auspice_json="auspice/{strain}/{segment}/{reference}/auspice_raw.json", params: @@ -466,7 +482,7 @@ rule swap_strain_accession: output: auspice_json="auspice/{strain}/{segment}/{reference}/auspice.json", params: - fake_clade = lambda w: '--add-fake-clade none' if w.segment != 'ha' else '' + fake_clade=lambda w: "--add-fake-clade none" if w.segment != "ha" else "", shell: """ python3 scripts/swap_strain_accession.py \ @@ -526,10 +542,11 @@ rule assemble_folder: cp {input.tree} {output.tree}; """ -if 'timestamp' not in config: + +if "timestamp" not in config: timestamp = datetime.datetime.utcnow().isoformat()[:-7] + "Z" else: - timestamp = config['timestamp'] + timestamp = config["timestamp"] rule test_nextclade: @@ -558,7 +575,6 @@ rule test_nextclade: """ - rule clean: shell: """ diff --git a/rsv/profiles/auspice_config.json b/rsv/profiles/auspice_config.json index 974c217..a0c3488 100644 --- a/rsv/profiles/auspice_config.json +++ b/rsv/profiles/auspice_config.json @@ -62,6 +62,12 @@ "displayName": "G_clade (Goya et al)", "description": "Clades based on the G gene and Goya et al, IRV, 2019.", "hideInWeb": false + }, + { + "name": "pango", + "displayName": "Proposed lineages", + "description": "Proposed lineages based on Pango", + "hideInWeb": false } ] } diff --git a/rsv/profiles/builds.yaml b/rsv/profiles/builds.yaml index 8afd6bc..488f32f 100644 --- a/rsv/profiles/builds.yaml +++ b/rsv/profiles/builds.yaml @@ -24,7 +24,7 @@ builds: ref_path: "data/a/EPI_ISL_412866/clade_reference.gbk" pango: key: pango - label_key: pango + label_key: pango_label excel_path: "profiles/pango/amino-acid-genotypes.xlsx" excel_sheet: "AA_RSVA" ref_path: "profiles/pango/REFROOTA.gb" @@ -45,8 +45,8 @@ builds: def: "references/b/EPI_ISL_1653999/clades_G.tsv" ref_path: "data/b/EPI_ISL_1653999/clade_reference.gbk" pango: - key: pango_lineage - label_key: pango_lineage + key: pango + label_key: pango_label excel_path: "profiles/pango/amino-acid-genotypes.xlsx" excel_sheet: "AA_RSVB" ref_path: "profiles/pango/REFROOTB.gb" diff --git a/rsv/scripts/excel-to-clades.py b/rsv/scripts/excel-to-clades.py index 35eaf5c..b821a26 100644 --- a/rsv/scripts/excel-to-clades.py +++ b/rsv/scripts/excel-to-clades.py @@ -25,6 +25,8 @@ def main( gene, rest = mut.split(":") site = rest[1:-1] alt = rest[-1] + if gene == "G": + continue f.write(f"{row['RSV genotype']}\t{gene}\t{site}\t{alt}\n") if __name__ == "__main__": From 3f328b6f952ce191cebadfe3e4af476560f09115 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 15 May 2023 14:50:18 +0200 Subject: [PATCH 5/7] Merge master --- rsv/profiles/builds.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsv/profiles/builds.yaml b/rsv/profiles/builds.yaml index 488f32f..3f75e1f 100644 --- a/rsv/profiles/builds.yaml +++ b/rsv/profiles/builds.yaml @@ -3,7 +3,7 @@ genes: ["NS1", "NS2", "N", "P", "M", "SH", "G", "F", "M2-1", "M2-2", "L"] auspice_config: "profiles/auspice_config.json" exclude: "profiles/exclude.txt" -timestamp: "2023-02-03T12:00:00Z" +timestamp: "2023-05-10T12:00:00Z" builds: a: From 130ab47864d20cdb116e094ab5de7a383a849be0 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Mon, 15 May 2023 14:55:49 +0200 Subject: [PATCH 6/7] Add pango to color orderings --- rsv/profiles/color_ordering_a.tsv | 35 +++++++++++++++++++++++++++ rsv/profiles/color_ordering_b.tsv | 40 ++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/rsv/profiles/color_ordering_a.tsv b/rsv/profiles/color_ordering_a.tsv index 144001b..108c59f 100644 --- a/rsv/profiles/color_ordering_a.tsv +++ b/rsv/profiles/color_ordering_a.tsv @@ -44,3 +44,38 @@ G_clade GA3.0.4a G_clade GA3.0.3b G_clade GA3.0.4b G_clade GA3.0.5b + +pango A.1 +pango A.2 +pango A.2.1 +pango A.3 +pango A.4 +pango A.5 +pango A.6 +pango A.6.1 +pango A.6.10 +pango A.6.10.1 +pango A.6.10.2 +pango A.6.11 +pango A.6.12 +pango A.6.12.1 +pango A.6.12.2 +pango A.6.13 +pango A.6.14 +pango A.6.14.1 +pango A.6.15 +pango A.6.15.1 +pango A.6.15.2 +pango A.6.16 +pango A.6.2 +pango A.6.3 +pango A.6.3.1 +pango A.6.4 +pango A.6.5 +pango A.6.5.1 +pango A.6.6 +pango A.6.7 +pango A.6.8 +pango A.6.9 +pango not-assigned + diff --git a/rsv/profiles/color_ordering_b.tsv b/rsv/profiles/color_ordering_b.tsv index c6ac645..ad41156 100644 --- a/rsv/profiles/color_ordering_b.tsv +++ b/rsv/profiles/color_ordering_b.tsv @@ -19,4 +19,42 @@ clade_membership B2 clade_membership B3 clade_membership B4 clade_membership B5 -clade_membership B6 \ No newline at end of file +clade_membership B6 + +pango B.1 +pango B.1.1 +pango B.2 +pango B.2.1 +pango B.2.1.1 +pango B.2.2 +pango B.2.3 +pango B.2.4 +pango B.2.5 +pango B.2.6 +pango B.2.7 +pango B.2.8 +pango B.2.9 +pango B.2.9.1 +pango B.2.9.2 +pango B.2.9.3 +pango B.2.9.4 +pango B.2.9.5 +pango B.2.9.6 +pango B.2.9.7 +pango B.2.9.8 +pango B.2.9.9 +pango B.2.9.10 +pango B.2.9.10.1 (B.A) +pango B.2.9.10.2 (B.B) +pango B.2.9.10.3 (B.C) +pango B.2.9.10.4 (B.D) +pango B.2.9.10.5 (B.E) +pango B.2.9.10.6 (B.F) +pango B.2.9.10.7 (B.G) +pango B.G.1 +pango B.G.2 +pango B.G.3 +pango B.G.4 +pango B.G.5 +pango B.G.6 +pango not assigned From 2559c6aabbfe099b7ee680202ac3eb3b87569a9a Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Wed, 28 Jun 2023 16:11:01 +0200 Subject: [PATCH 7/7] Improve overwrite for DV.7.1 --- sars-cov-2/profiles/clades/lineage_overwrite.tsv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sars-cov-2/profiles/clades/lineage_overwrite.tsv b/sars-cov-2/profiles/clades/lineage_overwrite.tsv index 9f69357..ecb74f0 100644 --- a/sars-cov-2/profiles/clades/lineage_overwrite.tsv +++ b/sars-cov-2/profiles/clades/lineage_overwrite.tsv @@ -1,5 +1,7 @@ lineage pos char +DV.7.1 22927 T + B.1.411 1519 T B.1.411 14774 G B.1.411 15438 T