-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSCRaMbLE_simulation_chr_len_events.py
More file actions
executable file
·154 lines (132 loc) · 17.5 KB
/
SCRaMbLE_simulation_chr_len_events.py
File metadata and controls
executable file
·154 lines (132 loc) · 17.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from SCRaMbLE_simulation_3 import SCRaMbLE4
from SCRaMbLE_simulation_3 import force_SCRaMLE
from SCRaMbLE_simulation_3 import force_SCRaMLE_lin_cir
from comparison_sol import NG50_calculator
import matplotlib.pyplot as plt
# 1 LoxP unit = 2.8Kb
syn_chr = list(range(1, 45, 1))
essential = [2,7,9,10,12,20]
#essential = [2,7,9,10,12,15,16,20,22,23,27,30]
#essential = [1,2,3,4,5,6,7,8,9,10,11,12]
L_event = 7 # length of SCRaMbLE events
SCRaMbLEd_events = list(range(0, 300, 1))
# store the chr length in a list
chr_len = [0 for i in range(len(SCRaMbLEd_events))]
L_unique = [0 for i in range(len(SCRaMbLEd_events))]
LG50 = [0 for i in range(len(SCRaMbLEd_events))]
# This is if you have results from previous simulations
# These results are with the normal SCRaMbLE4 function that allow null event
#previous_simulations = 1000
#chr_len = [44000, 44078, 43909, 43582, 44220, 44302, 44198, 43863, 44250, 44567, 43956, 44132, 44060, 44200, 44325, 44661, 43808, 45241, 44839, 45024, 44290, 44863, 44773, 44926, 44232, 44975, 45375, 45342, 45016, 45014, 45193, 45331, 45335, 44600, 45653, 44922, 45277, 44921, 45945, 44486, 46864, 45554, 44665, 47052, 45843, 45238, 46902, 46304, 46013, 45494, 44923, 46399, 45395, 45689, 45636, 45779, 46736, 47008, 47276, 46754, 46588, 46620, 45404, 45682, 46349, 46216, 47260, 45963, 47282, 46739, 46479, 46643, 46818, 47549, 47040, 46622, 47173, 46892, 45605, 46244, 45461, 46771, 46163, 46652, 45959, 46547, 46975, 46660, 46266, 46508, 47243, 48639, 48147, 47694, 46239, 46366, 47885, 46433, 45605, 45361, 44506, 46054, 45473, 45500, 46025, 46441, 45656, 46619, 47380, 46378, 48278, 45852, 47364, 47349, 46078, 47281, 46930, 46923, 47298, 47328, 47464, 46465, 45711, 46192, 47787, 45927, 46510, 45198, 46955, 47222, 45094, 46602, 47414, 46344, 45800, 46577, 47657, 46226, 46088, 46427, 47537, 46168, 45339, 47113, 47102, 46451, 45636, 47255, 46081, 47403, 43894, 46126, 47576, 45653, 46994, 45984, 46865, 46693, 46960, 46181, 44805, 45628, 47181, 46873, 44928, 47523, 46632, 46304, 45623, 44698, 45200, 46161, 46384, 44320, 44845, 46466, 44297, 46670, 44960, 45732, 45111, 45611, 45412, 45025, 43183, 45874, 45687, 45747, 46019, 45691, 46626, 44417, 44875, 46592, 43960, 45921, 45113, 44747, 44715, 47265, 44539, 44851, 44163, 44158, 45364, 45393, 45445, 43722, 44925, 45250, 45538, 44065, 46105, 45730, 45441, 44849, 44584, 44624, 44340, 45812, 43996, 44921, 43885, 45488, 44753, 43524, 45628, 45223, 44467, 44088, 44286, 44770, 43001, 43892, 43468, 44400, 44203, 43162, 44075, 42919, 43995, 43870, 45762, 43274, 43912, 45013, 42485, 44004, 43382, 43089, 43318, 43800, 43454, 44168, 44151, 43285, 43222, 43139, 43461, 43101, 43300, 44281, 43754, 42940, 43215, 43850, 43443, 42993, 44749, 43299, 42435, 43022, 43896, 41459, 42641, 43911, 43226, 44080, 43341, 42892, 42198, 42004, 42361, 42266, 41969, 42081, 42598, 43086, 42544, 41795, 41714, 42617, 42170, 43029, 42539, 41352, 42419, 42141, 42147, 42819]
#L_unique = [44000, 43328, 42478, 41504, 41087, 40558, 40039, 39395, 38877, 38294, 37718, 37544, 37164, 36620, 36174, 36053, 35207, 35706, 35378, 34920, 34182, 34474, 33933, 33784, 33066, 33179, 32796, 32579, 32268, 32325, 31848, 31720, 31479, 30813, 31266, 30680, 30508, 30318, 30541, 30042, 30160, 29966, 29354, 30051, 29257, 29084, 29658, 28656, 28810, 28318, 27859, 28551, 27839, 27989, 27742, 27664, 27996, 27734, 27784, 27363, 27156, 26994, 26473, 26709, 26653, 26461, 26719, 26183, 26240, 26174, 25977, 25907, 25887, 25915, 25830, 25516, 25369, 25335, 24825, 24663, 24540, 24797, 24838, 25122, 24378, 24683, 24055, 24359, 24225, 24128, 24207, 24766, 24054, 24074, 23409, 23378, 23842, 23499, 23257, 23230, 23103, 23130, 22749, 22965, 23032, 22931, 22859, 22584, 22833, 22480, 22865, 22053, 22823, 22770, 22312, 22113, 22374, 22063, 22319, 21892, 22116, 21776, 21545, 21368, 21979, 21756, 21516, 21276, 21255, 21371, 20803, 21083, 20964, 21097, 20907, 20818, 21012, 20812, 20779, 20814, 20798, 20493, 20397, 20701, 20404, 20114, 20022, 20429, 20203, 20501, 19457, 19786, 20275, 19605, 20203, 19840, 19943, 19838, 19791, 19511, 19339, 19542, 19526, 19810, 19008, 19502, 19214, 19381, 19029, 18995, 19063, 19080, 18943, 18521, 18528, 19056, 18234, 18772, 18468, 18576, 18366, 18602, 18496, 18170, 18025, 18487, 18537, 18330, 18268, 17912, 18413, 18162, 18206, 18397, 17595, 18203, 17712, 17819, 18102, 18100, 17835, 17460, 17673, 17334, 17710, 17660, 17632, 17230, 17172, 17149, 17541, 17068, 17444, 17358, 17131, 17198, 16968, 17267, 17012, 17166, 16826, 17216, 16944, 17193, 17093, 16847, 16946, 16620, 16455, 16526, 16789, 16615, 16143, 16357, 16390, 16691, 16389, 16576, 16473, 16225, 16052, 16180, 16781, 16186, 16175, 16552, 15827, 16004, 16251, 15814, 15949, 15981, 16056, 16178, 15922, 15735, 16041, 15823, 15990, 15804, 15761, 15743, 15807, 15528, 15648, 15846, 15355, 15546, 15420, 15516, 15322, 15577, 15462, 15283, 15170, 15351, 15433, 15394, 14907, 15078, 14994, 15087, 15243, 15047, 14908, 15042, 15053, 14937, 15110, 15031, 14478, 14576, 14791, 14749, 14640, 14529, 14564, 14665, 14549, 14834]
#LG50 = [21000, 20311, 19590, 18799, 18203, 17660, 17234, 16800, 16211, 15643, 15422, 15178, 14865, 14318, 14030, 13836, 13405, 13390, 13200, 12859, 12625, 12513, 12268, 12141, 11715, 11561, 11377, 11180, 11076, 11144, 10770, 10737, 10593, 10324, 10404, 10090, 9907, 9853, 9845, 9831, 9571, 9514, 9335, 9402, 9157, 9089, 9235, 8769, 8825, 8748, 8522, 8585, 8497, 8488, 8359, 8319, 8315, 8233, 8227, 8063, 7863, 7798, 7696, 7740, 7730, 7549, 7601, 7511, 7412, 7359, 7373, 7289, 7355, 7208, 7279, 7130, 7074, 7064, 6837, 6768, 6746, 6770, 6849, 6875, 6628, 6716, 6429, 6541, 6622, 6576, 6421, 6621, 6340, 6251, 6199, 6193, 6301, 6234, 6173, 6160, 6124, 6037, 5919, 6059, 5952, 5974, 5954, 5753, 5809, 5775, 5813, 5702, 5766, 5769, 5710, 5608, 5712, 5550, 5572, 5515, 5498, 5459, 5458, 5351, 5489, 5485, 5351, 5317, 5228, 5220, 5155, 5160, 5111, 5175, 5136, 5030, 5089, 5081, 5103, 5025, 4987, 4961, 4958, 5015, 4910, 4800, 4804, 4839, 4815, 4920, 4652, 4708, 4815, 4636, 4724, 4670, 4719, 4653, 4616, 4618, 4628, 4569, 4553, 4623, 4418, 4521, 4474, 4553, 4409, 4447, 4457, 4416, 4400, 4274, 4303, 4397, 4233, 4350, 4326, 4308, 4266, 4312, 4210, 4165, 4137, 4204, 4196, 4182, 4101, 4068, 4165, 4171, 4126, 4121, 4008, 4146, 4016, 3994, 4046, 3998, 4042, 3854, 4027, 3930, 3991, 3932, 3960, 3864, 3768, 3761, 3883, 3798, 3870, 3816, 3809, 3781, 3731, 3861, 3694, 3795, 3706, 3810, 3755, 3789, 3718, 3666, 3687, 3603, 3547, 3603, 3673, 3612, 3556, 3562, 3596, 3705, 3556, 3631, 3544, 3595, 3441, 3529, 3586, 3522, 3512, 3559, 3444, 3413, 3478, 3395, 3461, 3437, 3444, 3481, 3432, 3361, 3421, 3435, 3432, 3372, 3303, 3362, 3371, 3335, 3286, 3379, 3278, 3311, 3266, 3297, 3264, 3306, 3259, 3200, 3219, 3188, 3257, 3230, 3152, 3175, 3119, 3185, 3224, 3163, 3090, 3128, 3127, 3100, 3165, 3145, 3031, 3029, 3108, 3043, 3074, 3017, 3010, 3046, 2993, 3082]
# These results are with the force_SCRaMbLE function that force a SCRaMbLE event, null event are not possible.
previous_simulations = 1000
chr_len = [44000, 44077, 43986, 44210, 43850, 44406, 44247, 43949, 44026, 44950, 44803, 43776, 44394, 44657, 44567, 44641, 44894, 44203, 44928, 45324, 44947, 45307, 45919, 44938, 45758, 44768, 44585, 46167, 45198, 45637, 44206, 45643, 45474, 45721, 45720, 45690, 46200, 46705, 45123, 47137, 46612, 46111, 45615, 47625, 46394, 45767, 47236, 45463, 45438, 46937, 45748, 46590, 47159, 45259, 45944, 45869, 45821, 45327, 46891, 45874, 47693, 46014, 45872, 46618, 45424, 46999, 46126, 46737, 47036, 45028, 45546, 46520, 46024, 46207, 45002, 46028, 45553, 44816, 46582, 44877, 46424, 45907, 45593, 46047, 45035, 46532, 45629, 45454, 45891, 45104, 45251, 44530, 45251, 45874, 45477, 45564, 44829, 46428, 45954, 45241, 45285, 46648, 46082, 44931, 44528, 44078, 45653, 44427, 44920, 45681, 44094, 46848, 45381, 44271, 45228, 43709, 44524, 45875, 45279, 44126, 45153, 44813, 43866, 44872, 44464, 45600, 43531, 43734, 43474, 43887, 42958, 45090, 43788, 44320, 45284, 45267, 43274, 42772, 43274, 44989, 42561, 41787, 43521, 43644, 42648, 43391, 43869, 44956, 43749, 43918, 43866, 42725, 42421, 42459, 43303, 43079, 44234, 42316, 41317, 44310, 41286, 42965, 41614, 42760, 42830, 43154, 41554, 42596, 43935, 42855, 42691, 40999, 42131, 43200, 43150, 42269, 42036, 41596, 42136, 42591, 42013, 42575, 41796, 41164, 41278, 42178, 41590, 41732, 42504, 40850, 41006, 41606, 41312, 42271, 39824, 40138, 42013, 41076, 39234, 41372, 40632, 40596, 39970, 40707, 42535, 41066, 40827, 41755, 40114, 40483, 40750, 41034, 39590, 40563, 40105, 39601, 40364, 40412, 40936, 41304, 38787, 39263, 38757, 41161, 41194, 40676, 41408, 40143, 40009, 39359, 38502, 40828, 38533, 39830, 39052, 40969, 39996, 39990, 40395, 39006, 39388, 38440, 38821, 39609, 38493, 40091, 38157, 39553, 40165, 39925, 39978, 39253, 39287, 39381, 38532, 38387, 36913, 38640, 38733, 39046, 38617, 37796, 37207, 38127, 38137, 39661, 38130, 38215, 38755, 38192, 37612, 37697, 38597, 38595, 37811, 38943, 38764, 37836, 37909, 38546, 36910, 38863, 37079, 38497, 38064, 36906, 37902, 36822, 36580, 38346, 38229, 37314, 38464, 37407, 37122, 38101, 37706, 36684, 37065, 39574]
L_unique = [44000, 42525, 41145, 39960, 38492, 37800, 36899, 35562, 34993, 34508, 33793, 32588, 31838, 31519, 30952, 30377, 29744, 29395, 28841, 28349, 28211, 27603, 27824, 26774, 26979, 26039, 25947, 26034, 25253, 25095, 24527, 24448, 24436, 24044, 23752, 23450, 23318, 23427, 22607, 22707, 22555, 22407, 21883, 22280, 21772, 21507, 21415, 21165, 20837, 20990, 20742, 20572, 20515, 19992, 20011, 19640, 19730, 19274, 19673, 19156, 19339, 18891, 18495, 18967, 18682, 18474, 18267, 18341, 18293, 17850, 17584, 17823, 17566, 17297, 17166, 17219, 17133, 17112, 17243, 16846, 16733, 16785, 16448, 16392, 16242, 16453, 16345, 16236, 15990, 15731, 15717, 15639, 15554, 15400, 15589, 15595, 15232, 15390, 15127, 15239, 15139, 15076, 14937, 14955, 14799, 14453, 14742, 14412, 14585, 14545, 14120, 14494, 14489, 14133, 14008, 13981, 13748, 14013, 14021, 13919, 13781, 13856, 13546, 13739, 13469, 13766, 13430, 13365, 13188, 13212, 13118, 13221, 13214, 12902, 13222, 13143, 12970, 12838, 12787, 12828, 12580, 12311, 12676, 12679, 12454, 12390, 12383, 12325, 12403, 12264, 12348, 12135, 11990, 12086, 11908, 11965, 12156, 11856, 11783, 12060, 11390, 11710, 11504, 11727, 11660, 11640, 11391, 11602, 11686, 11600, 11525, 11262, 11478, 11437, 11322, 11148, 11311, 11075, 11292, 11135, 11082, 10951, 11082, 10959, 10884, 11001, 10789, 10901, 10804, 10741, 10697, 10796, 10768, 10823, 10444, 10470, 10582, 10672, 10410, 10538, 10371, 10441, 10305, 10364, 10590, 10260, 10289, 10362, 10218, 10133, 10274, 10384, 9993, 10148, 9927, 9832, 9993, 10130, 10041, 10050, 9942, 9729, 9783, 9915, 9860, 9822, 9884, 9910, 9775, 9759, 9532, 9925, 9648, 9560, 9651, 9772, 9534, 9540, 9646, 9593, 9510, 9415, 9469, 9482, 9397, 9436, 9350, 9497, 9349, 9391, 9433, 9252, 9222, 9382, 9151, 9231, 9054, 9179, 9057, 9150, 9025, 9064, 8967, 9014, 9114, 9150, 8984, 9118, 8899, 8927, 9036, 9006, 8905, 8877, 8792, 8957, 8778, 8769, 8873, 8698, 8607, 8619, 8827, 8909, 8790, 8751, 8680, 8579, 8593, 8631, 8891, 8603, 8749, 8592, 8420, 8523, 8620, 8454, 8496, 8605]
LG50 = [21000, 19579, 18296, 17138, 16000, 15205, 14563, 13596, 13133, 12627, 12171, 11570, 10957, 10743, 10354, 10060, 9645, 9459, 9136, 8808, 8778, 8340, 8520, 8093, 8070, 7729, 7685, 7573, 7229, 7097, 7042, 6875, 6916, 6709, 6620, 6427, 6269, 6340, 6089, 6002, 6008, 5874, 5694, 5770, 5634, 5579, 5463, 5404, 5321, 5293, 5236, 5116, 5086, 4915, 4916, 4873, 4808, 4706, 4764, 4595, 4670, 4475, 4428, 4520, 4483, 4317, 4313, 4285, 4263, 4202, 4025, 4121, 4096, 3995, 3944, 3866, 3853, 3904, 3871, 3788, 3780, 3734, 3666, 3635, 3646, 3602, 3696, 3622, 3549, 3436, 3429, 3466, 3400, 3345, 3393, 3384, 3259, 3301, 3234, 3216, 3237, 3215, 3207, 3173, 3092, 3036, 3110, 3027, 3074, 3049, 2911, 3014, 3005, 2922, 2905, 2883, 2821, 2919, 2854, 2904, 2801, 2851, 2754, 2824, 2707, 2795, 2738, 2676, 2650, 2642, 2631, 2661, 2629, 2601, 2632, 2603, 2595, 2552, 2493, 2519, 2412, 2394, 2462, 2502, 2412, 2424, 2368, 2370, 2427, 2364, 2416, 2327, 2350, 2351, 2306, 2292, 2311, 2281, 2310, 2308, 2158, 2194, 2194, 2218, 2186, 2152, 2138, 2189, 2210, 2165, 2179, 2097, 2169, 2115, 2118, 2090, 2084, 2114, 2119, 2043, 2042, 1990, 2071, 2051, 1964, 2007, 1981, 1974, 1967, 1956, 1953, 1983, 1949, 1917, 1869, 1860, 1914, 1950, 1888, 1869, 1856, 1895, 1853, 1835, 1885, 1796, 1876, 1857, 1838, 1768, 1851, 1874, 1725, 1830, 1731, 1725, 1766, 1762, 1791, 1766, 1763, 1694, 1736, 1713, 1695, 1708, 1733, 1739, 1714, 1719, 1651, 1733, 1698, 1649, 1699, 1691, 1612, 1643, 1639, 1650, 1616, 1609, 1608, 1612, 1581, 1637, 1586, 1648, 1597, 1592, 1587, 1564, 1554, 1598, 1561, 1561, 1525, 1547, 1540, 1537, 1511, 1523, 1485, 1480, 1519, 1511, 1502, 1540, 1486, 1498, 1512, 1480, 1482, 1469, 1408, 1487, 1452, 1450, 1493, 1427, 1459, 1398, 1473, 1459, 1453, 1430, 1445, 1425, 1412, 1403, 1451, 1438, 1408, 1410, 1336, 1400, 1382, 1390, 1322, 1372]
# Here starts the new simulations
simulations = 1
# create many simulations
for simulation in range(simulations):
print(simulation)
# create many chromosomes with different number of SCRaMbLE events
for events in SCRaMbLEd_events:
#S = SCRaMbLE4(syn_chr, events, essential)
S = force_SCRaMLE(syn_chr, events, essential)
NG50 = NG50_calculator(S)
L_unique[events] = L_unique[events] + NG50[1]
LG50[events] = LG50[events] + NG50[3]
chr_len[events] = chr_len[events] + len(S)
#print("SCRaMbLEd_events =", events)
#print("SCRaMbLEd chr =",S)
print("chr_len =", chr_len)
print("L_unique =", L_unique)
print("LG50 =", LG50)
print("-------")
# divide each element by the number of simulations
simulations = simulations + previous_simulations
chr_len = [x/simulations for x in chr_len]
L_unique = [x/simulations for x in L_unique]
LG50 = [x/simulations for x in LG50]
#for i in range(len(chr_len)):
# chr_len[i] = chr_len[i] / simulations
print("simulations =", simulations)
print("chr_len =", chr_len)
print("L_unique =", L_unique)
print("LG50 =", LG50)
# plot the results
# Use this if you want histograms
#plt.bar(SCRaMbLEd_events, chr_len, align='center')
#plt.xticks(SCRaMbLEd_events, SCRaMbLEd_events)
# Use this if you want lines
plt.plot(SCRaMbLEd_events, chr_len, label="Chr len")
plt.plot(SCRaMbLEd_events, L_unique, label="Len unique")
plt.plot(SCRaMbLEd_events, LG50, label="LG50")
plt.ylim([0,max(chr_len)+5])
plt.ylabel("Chr length mean over "+str(simulations)+" simulations")
plt.xlabel("SCRaMbLEd events")
plt.title("Chr length over SCRaMbLE events")
plt.hlines(len(essential), 0, len(SCRaMbLEd_events), colors="red", linestyle=":", label="Essential LU")
plt.text(SCRaMbLEd_events[-1]*-0.16, len(syn_chr)*-0.14, "Simulations = " + str(simulations) + "\n" + "Initial chr length = " + str(len(syn_chr)) + "\n" + "Max SCRaMbLEd events = " + str(SCRaMbLEd_events[-1]) + "\n" + "Essential LU = "+ str(len(essential)))
# plt.savefig('chr_length_events.png')
# I should add the error bar to the histograms
plt.show()
def group_with_essential(chr, essential, L_event):
# grups = chr - L_event + 1 # number of groups where recombination can happen.
G_e = 0 # groups with essential LU
c = 0 # c = counter but it is also how many groups are there
while L_event + c <= len(chr):
event = chr[c:c+L_event]
out_event = chr[:c] + chr[c+L_event:]
for E in essential:
if E in event and E not in out_event:
G_e = G_e + 1
break
c = c + 1
return G_e
# Calculate analytically what is the chromosome length equilibrium after many SCRaMbLEd events
Lt = 44 # initial chromosome length (LU)
essential = 6 # number of essential LU
#P_del = 0.4
#P_dup = 0.2
P_del = 2/7 # probability of deletions
P_dup = 1/7 # probability of duplications
L_event = 7 # length of SCRaMbLE events
SCRaMbLE_events = 50
for i in range(SCRaMbLE_events):
#Lt = Lt - 0.4 * (1 - (essential / Lt)) * 7
#Lt = Lt - 0.4 * (1 - (essential / Lt)) * 7 + 0.2 * 7
#Lt = Lt - P_del * (1 - (essential / Lt)) * L_event + P_dup * L_event
P_zero_essential = ((Lt - essential) / Lt) ** L_event # probability that there are not essential LU in one group
#P_zero_essential = 1
#for j in range(L_event):
# P_zero_essential = P_zero_essential * ((Lt - essential - j) / (Lt - j)) # probability that there are not essential LU in one group
Lt = Lt - P_del * P_zero_essential * L_event + P_dup * L_event
print("P_zero_essential =", round(P_zero_essential,3))
print("Lt =", round(Lt,1))
# Formula to calculate the chromosome length at equilibrium, after many SCRaMbLE events.
# essential / Lt1 = (P_del - P_dup) / P_del
L_equilibrium = (P_del / (P_del - P_dup)) * essential
print("L_equilibrium =", L_equilibrium)
#grups = Lt - L_event + 1 # number of groups where recombination can happen.
#P_zero_essential = ((Lt - essential) / Lt) ** L_event # probability that there are not essential LU in one group
#P_more_1_essential = 1 - P_zero_essential
#Lt = Lt - P_del * P_zero_essential * L_event + P_dup * L_event
# Here I try to simulate the length of one chromosome as it goes throught SCRaMbLE events.
syn_chr = list(range(1, 45, 1))
essential = [2,7,9,10,12,20]
L_event = 7 # length of SCRaMbLE events
expected_L = 44
simulations = 50
S = syn_chr[:]
for i in range(simulations):
S = force_SCRaMLE(S, 1, essential)
# Calculate probability of deletions
print("--------------")
print("expected length =", round(expected_L,1), "real legth =", len(S))
G_essential = group_with_essential(S, essential, 7)
groups = len(S) - 7 + 1
P_del = 2 / 7 * (1 - G_essential / groups)
expected_L = len(S) - P_del * 7 + 1 / 7 * 7
print("probability of deletion = ", round(P_del,4))