Last active
January 28, 2021 00:09
-
-
Save damiankao/81b6ebd123b9ccf98e0e47f1ddd3ddd5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ''' | |
| Naive probabilistic approach backwards engineer the vaccine sequence: | |
| 1. For every codon-position, base, and amino-acid result, calculate a probability of base change. | |
| 2. Apply this probability to the viral sequence and generate a vaccine sequence. | |
| 3. Compare the generated vaccine sequence to the known vaccine sequence and check for % match. | |
| This code is in reference to: | |
| https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/ | |
| Requires python3 | |
| ''' | |
| from collections import Counter,defaultdict | |
| import random | |
| codon_comparison = [x.strip().split(',')[1:] for x in '''0,ATG,ATG | |
| 3,TTT,TTC | |
| 6,GTT,GTG | |
| 9,TTT,TTC | |
| 12,CTT,CTG | |
| 15,GTT,GTG | |
| 18,TTA,CTG | |
| 21,TTG,CTG | |
| 24,CCA,CCT | |
| 27,CTA,CTG | |
| 30,GTC,GTG | |
| 33,TCT,TCC | |
| 36,AGT,AGC | |
| 39,CAG,CAG | |
| 42,TGT,TGT | |
| 45,GTT,GTG | |
| 48,AAT,AAC | |
| 51,CTT,CTG | |
| 54,ACA,ACC | |
| 57,ACC,ACC | |
| 60,AGA,AGA | |
| 63,ACT,ACA | |
| 66,CAA,CAG | |
| 69,TTA,CTG | |
| 72,CCC,CCT | |
| 75,CCT,CCA | |
| 78,GCA,GCC | |
| 81,TAC,TAC | |
| 84,ACT,ACC | |
| 87,AAT,AAC | |
| 90,TCT,AGC | |
| 93,TTC,TTT | |
| 96,ACA,ACC | |
| 99,CGT,AGA | |
| 102,GGT,GGC | |
| 105,GTT,GTG | |
| 108,TAT,TAC | |
| 111,TAC,TAC | |
| 114,CCT,CCC | |
| 117,GAC,GAC | |
| 120,AAA,AAG | |
| 123,GTT,GTG | |
| 126,TTC,TTC | |
| 129,AGA,AGA | |
| 132,TCC,TCC | |
| 135,TCA,AGC | |
| 138,GTT,GTG | |
| 141,TTA,CTG | |
| 144,CAT,CAC | |
| 147,TCA,TCT | |
| 150,ACT,ACC | |
| 153,CAG,CAG | |
| 156,GAC,GAC | |
| 159,TTG,CTG | |
| 162,TTC,TTC | |
| 165,TTA,CTG | |
| 168,CCT,CCT | |
| 171,TTC,TTC | |
| 174,TTT,TTC | |
| 177,TCC,AGC | |
| 180,AAT,AAC | |
| 183,GTT,GTG | |
| 186,ACT,ACC | |
| 189,TGG,TGG | |
| 192,TTC,TTC | |
| 195,CAT,CAC | |
| 198,GCT,GCC | |
| 201,ATA,ATC | |
| 204,CAT,CAC | |
| 207,GTC,GTG | |
| 210,TCT,TCC | |
| 213,GGG,GGC | |
| 216,ACC,ACC | |
| 219,AAT,AAT | |
| 222,GGT,GGC | |
| 225,ACT,ACC | |
| 228,AAG,AAG | |
| 231,AGG,AGA | |
| 234,TTT,TTC | |
| 237,GAT,GAC | |
| 240,AAC,AAC | |
| 243,CCT,CCC | |
| 246,GTC,GTG | |
| 249,CTA,CTG | |
| 252,CCA,CCC | |
| 255,TTT,TTC | |
| 258,AAT,AAC | |
| 261,GAT,GAC | |
| 264,GGT,GGG | |
| 267,GTT,GTG | |
| 270,TAT,TAC | |
| 273,TTT,TTT | |
| 276,GCT,GCC | |
| 279,TCC,AGC | |
| 282,ACT,ACC | |
| 285,GAG,GAG | |
| 288,AAG,AAG | |
| 291,TCT,TCC | |
| 294,AAC,AAC | |
| 297,ATA,ATC | |
| 300,ATA,ATC | |
| 303,AGA,AGA | |
| 306,GGC,GGC | |
| 309,TGG,TGG | |
| 312,ATT,ATC | |
| 315,TTT,TTC | |
| 318,GGT,GGC | |
| 321,ACT,ACC | |
| 324,ACT,ACA | |
| 327,TTA,CTG | |
| 330,GAT,GAC | |
| 333,TCG,AGC | |
| 336,AAG,AAG | |
| 339,ACC,ACC | |
| 342,CAG,CAG | |
| 345,TCC,AGC | |
| 348,CTA,CTG | |
| 351,CTT,CTG | |
| 354,ATT,ATC | |
| 357,GTT,GTG | |
| 360,AAT,AAC | |
| 363,AAC,AAC | |
| 366,GCT,GCC | |
| 369,ACT,ACC | |
| 372,AAT,AAC | |
| 375,GTT,GTG | |
| 378,GTT,GTC | |
| 381,ATT,ATC | |
| 384,AAA,AAA | |
| 387,GTC,GTG | |
| 390,TGT,TGC | |
| 393,GAA,GAG | |
| 396,TTT,TTC | |
| 399,CAA,CAG | |
| 402,TTT,TTC | |
| 405,TGT,TGC | |
| 408,AAT,AAC | |
| 411,GAT,GAC | |
| 414,CCA,CCC | |
| 417,TTT,TTC | |
| 420,TTG,CTG | |
| 423,GGT,GGC | |
| 426,GTT,GTC | |
| 429,TAT,TAC | |
| 432,TAC,TAC | |
| 435,CAC,CAC | |
| 438,AAA,AAG | |
| 441,AAC,AAC | |
| 444,AAC,AAC | |
| 447,AAA,AAG | |
| 450,AGT,AGC | |
| 453,TGG,TGG | |
| 456,ATG,ATG | |
| 459,GAA,GAA | |
| 462,AGT,AGC | |
| 465,GAG,GAG | |
| 468,TTC,TTC | |
| 471,AGA,CGG | |
| 474,GTT,GTG | |
| 477,TAT,TAC | |
| 480,TCT,AGC | |
| 483,AGT,AGC | |
| 486,GCG,GCC | |
| 489,AAT,AAC | |
| 492,AAT,AAC | |
| 495,TGC,TGC | |
| 498,ACT,ACC | |
| 501,TTT,TTC | |
| 504,GAA,GAG | |
| 507,TAT,TAC | |
| 510,GTC,GTG | |
| 513,TCT,TCC | |
| 516,CAG,CAG | |
| 519,CCT,CCT | |
| 522,TTT,TTC | |
| 525,CTT,CTG | |
| 528,ATG,ATG | |
| 531,GAC,GAC | |
| 534,CTT,CTG | |
| 537,GAA,GAA | |
| 540,GGA,GGC | |
| 543,AAA,AAG | |
| 546,CAG,CAG | |
| 549,GGT,GGC | |
| 552,AAT,AAC | |
| 555,TTC,TTC | |
| 558,AAA,AAG | |
| 561,AAT,AAC | |
| 564,CTT,CTG | |
| 567,AGG,CGC | |
| 570,GAA,GAG | |
| 573,TTT,TTC | |
| 576,GTG,GTG | |
| 579,TTT,TTT | |
| 582,AAG,AAG | |
| 585,AAT,AAC | |
| 588,ATT,ATC | |
| 591,GAT,GAC | |
| 594,GGT,GGC | |
| 597,TAT,TAC | |
| 600,TTT,TTC | |
| 603,AAA,AAG | |
| 606,ATA,ATC | |
| 609,TAT,TAC | |
| 612,TCT,AGC | |
| 615,AAG,AAG | |
| 618,CAC,CAC | |
| 621,ACG,ACC | |
| 624,CCT,CCT | |
| 627,ATT,ATC | |
| 630,AAT,AAC | |
| 633,TTA,CTC | |
| 636,GTG,GTG | |
| 639,CGT,CGG | |
| 642,GAT,GAT | |
| 645,CTC,CTG | |
| 648,CCT,CCT | |
| 651,CAG,CAG | |
| 654,GGT,GGC | |
| 657,TTT,TTC | |
| 660,TCG,TCT | |
| 663,GCT,GCT | |
| 666,TTA,CTG | |
| 669,GAA,GAA | |
| 672,CCA,CCC | |
| 675,TTG,CTG | |
| 678,GTA,GTG | |
| 681,GAT,GAT | |
| 684,TTG,CTG | |
| 687,CCA,CCC | |
| 690,ATA,ATC | |
| 693,GGT,GGC | |
| 696,ATT,ATC | |
| 699,AAC,AAC | |
| 702,ATC,ATC | |
| 705,ACT,ACC | |
| 708,AGG,CGG | |
| 711,TTT,TTT | |
| 714,CAA,CAG | |
| 717,ACT,ACA | |
| 720,TTA,CTG | |
| 723,CTT,CTG | |
| 726,GCT,GCC | |
| 729,TTA,CTG | |
| 732,CAT,CAC | |
| 735,AGA,AGA | |
| 738,AGT,AGC | |
| 741,TAT,TAC | |
| 744,TTG,CTG | |
| 747,ACT,ACA | |
| 750,CCT,CCT | |
| 753,GGT,GGC | |
| 756,GAT,GAT | |
| 759,TCT,AGC | |
| 762,TCT,AGC | |
| 765,TCA,AGC | |
| 768,GGT,GGA | |
| 771,TGG,TGG | |
| 774,ACA,ACA | |
| 777,GCT,GCT | |
| 780,GGT,GGT | |
| 783,GCT,GCC | |
| 786,GCA,GCC | |
| 789,GCT,GCT | |
| 792,TAT,TAC | |
| 795,TAT,TAT | |
| 798,GTG,GTG | |
| 801,GGT,GGC | |
| 804,TAT,TAC | |
| 807,CTT,CTG | |
| 810,CAA,CAG | |
| 813,CCT,CCT | |
| 816,AGG,AGA | |
| 819,ACT,ACC | |
| 822,TTT,TTC | |
| 825,CTA,CTG | |
| 828,TTA,CTG | |
| 831,AAA,AAG | |
| 834,TAT,TAC | |
| 837,AAT,AAC | |
| 840,GAA,GAG | |
| 843,AAT,AAC | |
| 846,GGA,GGC | |
| 849,ACC,ACC | |
| 852,ATT,ATC | |
| 855,ACA,ACC | |
| 858,GAT,GAC | |
| 861,GCT,GCC | |
| 864,GTA,GTG | |
| 867,GAC,GAT | |
| 870,TGT,TGT | |
| 873,GCA,GCT | |
| 876,CTT,CTG | |
| 879,GAC,GAT | |
| 882,CCT,CCT | |
| 885,CTC,CTG | |
| 888,TCA,AGC | |
| 891,GAA,GAG | |
| 894,ACA,ACA | |
| 897,AAG,AAG | |
| 900,TGT,TGC | |
| 903,ACG,ACC | |
| 906,TTG,CTG | |
| 909,AAA,AAG | |
| 912,TCC,TCC | |
| 915,TTC,TTC | |
| 918,ACT,ACC | |
| 921,GTA,GTG | |
| 924,GAA,GAA | |
| 927,AAA,AAG | |
| 930,GGA,GGC | |
| 933,ATC,ATC | |
| 936,TAT,TAC | |
| 939,CAA,CAG | |
| 942,ACT,ACC | |
| 945,TCT,AGC | |
| 948,AAC,AAC | |
| 951,TTT,TTC | |
| 954,AGA,CGG | |
| 957,GTC,GTG | |
| 960,CAA,CAG | |
| 963,CCA,CCC | |
| 966,ACA,ACC | |
| 969,GAA,GAA | |
| 972,TCT,TCC | |
| 975,ATT,ATC | |
| 978,GTT,GTG | |
| 981,AGA,CGG | |
| 984,TTT,TTC | |
| 987,CCT,CCC | |
| 990,AAT,AAT | |
| 993,ATT,ATC | |
| 996,ACA,ACC | |
| 999,AAC,AAT | |
| 1002,TTG,CTG | |
| 1005,TGC,TGC | |
| 1008,CCT,CCC | |
| 1011,TTT,TTC | |
| 1014,GGT,GGC | |
| 1017,GAA,GAG | |
| 1020,GTT,GTG | |
| 1023,TTT,TTC | |
| 1026,AAC,AAT | |
| 1029,GCC,GCC | |
| 1032,ACC,ACC | |
| 1035,AGA,AGA | |
| 1038,TTT,TTC | |
| 1041,GCA,GCC | |
| 1044,TCT,TCT | |
| 1047,GTT,GTG | |
| 1050,TAT,TAC | |
| 1053,GCT,GCC | |
| 1056,TGG,TGG | |
| 1059,AAC,AAC | |
| 1062,AGG,CGG | |
| 1065,AAG,AAG | |
| 1068,AGA,CGG | |
| 1071,ATC,ATC | |
| 1074,AGC,AGC | |
| 1077,AAC,AAT | |
| 1080,TGT,TGC | |
| 1083,GTT,GTG | |
| 1086,GCT,GCC | |
| 1089,GAT,GAC | |
| 1092,TAT,TAC | |
| 1095,TCT,TCC | |
| 1098,GTC,GTG | |
| 1101,CTA,CTG | |
| 1104,TAT,TAC | |
| 1107,AAT,AAC | |
| 1110,TCC,TCC | |
| 1113,GCA,GCC | |
| 1116,TCA,AGC | |
| 1119,TTT,TTC | |
| 1122,TCC,AGC | |
| 1125,ACT,ACC | |
| 1128,TTT,TTC | |
| 1131,AAG,AAG | |
| 1134,TGT,TGC | |
| 1137,TAT,TAC | |
| 1140,GGA,GGC | |
| 1143,GTG,GTG | |
| 1146,TCT,TCC | |
| 1149,CCT,CCT | |
| 1152,ACT,ACC | |
| 1155,AAA,AAG | |
| 1158,TTA,CTG | |
| 1161,AAT,AAC | |
| 1164,GAT,GAC | |
| 1167,CTC,CTG | |
| 1170,TGC,TGC | |
| 1173,TTT,TTC | |
| 1176,ACT,ACA | |
| 1179,AAT,AAC | |
| 1182,GTC,GTG | |
| 1185,TAT,TAC | |
| 1188,GCA,GCC | |
| 1191,GAT,GAC | |
| 1194,TCA,AGC | |
| 1197,TTT,TTC | |
| 1200,GTA,GTG | |
| 1203,ATT,ATC | |
| 1206,AGA,CGG | |
| 1209,GGT,GGA | |
| 1212,GAT,GAT | |
| 1215,GAA,GAA | |
| 1218,GTC,GTG | |
| 1221,AGA,CGG | |
| 1224,CAA,CAG | |
| 1227,ATC,ATT | |
| 1230,GCT,GCC | |
| 1233,CCA,CCT | |
| 1236,GGG,GGA | |
| 1239,CAA,CAG | |
| 1242,ACT,ACA | |
| 1245,GGA,GGC | |
| 1248,AAG,AAG | |
| 1251,ATT,ATC | |
| 1254,GCT,GCC | |
| 1257,GAT,GAC | |
| 1260,TAT,TAC | |
| 1263,AAT,AAC | |
| 1266,TAT,TAC | |
| 1269,AAA,AAG | |
| 1272,TTA,CTG | |
| 1275,CCA,CCC | |
| 1278,GAT,GAC | |
| 1281,GAT,GAC | |
| 1284,TTT,TTC | |
| 1287,ACA,ACC | |
| 1290,GGC,GGC | |
| 1293,TGC,TGT | |
| 1296,GTT,GTG | |
| 1299,ATA,ATT | |
| 1302,GCT,GCC | |
| 1305,TGG,TGG | |
| 1308,AAT,AAC | |
| 1311,TCT,AGC | |
| 1314,AAC,AAC | |
| 1317,AAT,AAC | |
| 1320,CTT,CTG | |
| 1323,GAT,GAC | |
| 1326,TCT,TCC | |
| 1329,AAG,AAA | |
| 1332,GTT,GTC | |
| 1335,GGT,GGC | |
| 1338,GGT,GGC | |
| 1341,AAT,AAC | |
| 1344,TAT,TAC | |
| 1347,AAT,AAT | |
| 1350,TAC,TAC | |
| 1353,CTG,CTG | |
| 1356,TAT,TAC | |
| 1359,AGA,CGG | |
| 1362,TTG,CTG | |
| 1365,TTT,TTC | |
| 1368,AGG,CGG | |
| 1371,AAG,AAG | |
| 1374,TCT,TCC | |
| 1377,AAT,AAT | |
| 1380,CTC,CTG | |
| 1383,AAA,AAG | |
| 1386,CCT,CCC | |
| 1389,TTT,TTC | |
| 1392,GAG,GAG | |
| 1395,AGA,CGG | |
| 1398,GAT,GAC | |
| 1401,ATT,ATC | |
| 1404,TCA,TCC | |
| 1407,ACT,ACC | |
| 1410,GAA,GAG | |
| 1413,ATC,ATC | |
| 1416,TAT,TAT | |
| 1419,CAG,CAG | |
| 1422,GCC,GCC | |
| 1425,GGT,GGC | |
| 1428,AGC,AGC | |
| 1431,ACA,ACC | |
| 1434,CCT,CCT | |
| 1437,TGT,TGT | |
| 1440,AAT,AAC | |
| 1443,GGT,GGC | |
| 1446,GTT,GTG | |
| 1449,GAA,GAA | |
| 1452,GGT,GGC | |
| 1455,TTT,TTC | |
| 1458,AAT,AAC | |
| 1461,TGT,TGC | |
| 1464,TAC,TAC | |
| 1467,TTT,TTC | |
| 1470,CCT,CCA | |
| 1473,TTA,CTG | |
| 1476,CAA,CAG | |
| 1479,TCA,TCC | |
| 1482,TAT,TAC | |
| 1485,GGT,GGC | |
| 1488,TTC,TTT | |
| 1491,CAA,CAG | |
| 1494,CCC,CCC | |
| 1497,ACT,ACA | |
| 1500,AAT,AAT | |
| 1503,GGT,GGC | |
| 1506,GTT,GTG | |
| 1509,GGT,GGC | |
| 1512,TAC,TAT | |
| 1515,CAA,CAG | |
| 1518,CCA,CCC | |
| 1521,TAC,TAC | |
| 1524,AGA,AGA | |
| 1527,GTA,GTG | |
| 1530,GTA,GTG | |
| 1533,GTA,GTG | |
| 1536,CTT,CTG | |
| 1539,TCT,AGC | |
| 1542,TTT,TTC | |
| 1545,GAA,GAA | |
| 1548,CTT,CTG | |
| 1551,CTA,CTG | |
| 1554,CAT,CAT | |
| 1557,GCA,GCC | |
| 1560,CCA,CCT | |
| 1563,GCA,GCC | |
| 1566,ACT,ACA | |
| 1569,GTT,GTG | |
| 1572,TGT,TGC | |
| 1575,GGA,GGC | |
| 1578,CCT,CCT | |
| 1581,AAA,AAG | |
| 1584,AAG,AAA | |
| 1587,TCT,AGC | |
| 1590,ACT,ACC | |
| 1593,AAT,AAT | |
| 1596,TTG,CTC | |
| 1599,GTT,GTG | |
| 1602,AAA,AAG | |
| 1605,AAC,AAC | |
| 1608,AAA,AAA | |
| 1611,TGT,TGC | |
| 1614,GTC,GTG | |
| 1617,AAT,AAC | |
| 1620,TTC,TTC | |
| 1623,AAC,AAC | |
| 1626,TTC,TTC | |
| 1629,AAT,AAC | |
| 1632,GGT,GGC | |
| 1635,TTA,CTG | |
| 1638,ACA,ACC | |
| 1641,GGC,GGC | |
| 1644,ACA,ACC | |
| 1647,GGT,GGC | |
| 1650,GTT,GTG | |
| 1653,CTT,CTG | |
| 1656,ACT,ACA | |
| 1659,GAG,GAG | |
| 1662,TCT,AGC | |
| 1665,AAC,AAC | |
| 1668,AAA,AAG | |
| 1671,AAG,AAG | |
| 1674,TTT,TTC | |
| 1677,CTG,CTG | |
| 1680,CCT,CCA | |
| 1683,TTC,TTC | |
| 1686,CAA,CAG | |
| 1689,CAA,CAG | |
| 1692,TTT,TTT | |
| 1695,GGC,GGC | |
| 1698,AGA,CGG | |
| 1701,GAC,GAT | |
| 1704,ATT,ATC | |
| 1707,GCT,GCC | |
| 1710,GAC,GAT | |
| 1713,ACT,ACC | |
| 1716,ACT,ACA | |
| 1719,GAT,GAC | |
| 1722,GCT,GCC | |
| 1725,GTC,GTT | |
| 1728,CGT,AGA | |
| 1731,GAT,GAT | |
| 1734,CCA,CCC | |
| 1737,CAG,CAG | |
| 1740,ACA,ACA | |
| 1743,CTT,CTG | |
| 1746,GAG,GAA | |
| 1749,ATT,ATC | |
| 1752,CTT,CTG | |
| 1755,GAC,GAC | |
| 1758,ATT,ATC | |
| 1761,ACA,ACC | |
| 1764,CCA,CCT | |
| 1767,TGT,TGC | |
| 1770,TCT,AGC | |
| 1773,TTT,TTC | |
| 1776,GGT,GGC | |
| 1779,GGT,GGA | |
| 1782,GTC,GTG | |
| 1785,AGT,TCT | |
| 1788,GTT,GTG | |
| 1791,ATA,ATC | |
| 1794,ACA,ACC | |
| 1797,CCA,CCT | |
| 1800,GGA,GGC | |
| 1803,ACA,ACC | |
| 1806,AAT,AAC | |
| 1809,ACT,ACC | |
| 1812,TCT,AGC | |
| 1815,AAC,AAT | |
| 1818,CAG,CAG | |
| 1821,GTT,GTG | |
| 1824,GCT,GCA | |
| 1827,GTT,GTG | |
| 1830,CTT,CTG | |
| 1833,TAT,TAC | |
| 1836,CAG,CAG | |
| 1839,GAT,GAC | |
| 1842,GTT,GTG | |
| 1845,AAC,AAC | |
| 1848,TGC,TGT | |
| 1851,ACA,ACC | |
| 1854,GAA,GAA | |
| 1857,GTC,GTG | |
| 1860,CCT,CCC | |
| 1863,GTT,GTG | |
| 1866,GCT,GCC | |
| 1869,ATT,ATT | |
| 1872,CAT,CAC | |
| 1875,GCA,GCC | |
| 1878,GAT,GAT | |
| 1881,CAA,CAG | |
| 1884,CTT,CTG | |
| 1887,ACT,ACA | |
| 1890,CCT,CCT | |
| 1893,ACT,ACA | |
| 1896,TGG,TGG | |
| 1899,CGT,CGG | |
| 1902,GTT,GTG | |
| 1905,TAT,TAC | |
| 1908,TCT,TCC | |
| 1911,ACA,ACC | |
| 1914,GGT,GGC | |
| 1917,TCT,AGC | |
| 1920,AAT,AAT | |
| 1923,GTT,GTG | |
| 1926,TTT,TTT | |
| 1929,CAA,CAG | |
| 1932,ACA,ACC | |
| 1935,CGT,AGA | |
| 1938,GCA,GCC | |
| 1941,GGC,GGC | |
| 1944,TGT,TGT | |
| 1947,TTA,CTG | |
| 1950,ATA,ATC | |
| 1953,GGG,GGA | |
| 1956,GCT,GCC | |
| 1959,GAA,GAG | |
| 1962,CAT,CAC | |
| 1965,GTC,GTG | |
| 1968,AAC,AAC | |
| 1971,AAC,AAT | |
| 1974,TCA,AGC | |
| 1977,TAT,TAC | |
| 1980,GAG,GAG | |
| 1983,TGT,TGC | |
| 1986,GAC,GAC | |
| 1989,ATA,ATC | |
| 1992,CCC,CCC | |
| 1995,ATT,ATC | |
| 1998,GGT,GGC | |
| 2001,GCA,GCT | |
| 2004,GGT,GGA | |
| 2007,ATA,ATC | |
| 2010,TGC,TGC | |
| 2013,GCT,GCC | |
| 2016,AGT,AGC | |
| 2019,TAT,TAC | |
| 2022,CAG,CAG | |
| 2025,ACT,ACA | |
| 2028,CAG,CAG | |
| 2031,ACT,ACA | |
| 2034,AAT,AAC | |
| 2037,TCT,AGC | |
| 2040,CCT,CCT | |
| 2043,CGG,CGG | |
| 2046,CGG,AGA | |
| 2049,GCA,GCC | |
| 2052,CGT,AGA | |
| 2055,AGT,AGC | |
| 2058,GTA,GTG | |
| 2061,GCT,GCC | |
| 2064,AGT,AGC | |
| 2067,CAA,CAG | |
| 2070,TCC,AGC | |
| 2073,ATC,ATC | |
| 2076,ATT,ATT | |
| 2079,GCC,GCC | |
| 2082,TAC,TAC | |
| 2085,ACT,ACA | |
| 2088,ATG,ATG | |
| 2091,TCA,TCT | |
| 2094,CTT,CTG | |
| 2097,GGT,GGC | |
| 2100,GCA,GCC | |
| 2103,GAA,GAG | |
| 2106,AAT,AAC | |
| 2109,TCA,AGC | |
| 2112,GTT,GTG | |
| 2115,GCT,GCC | |
| 2118,TAC,TAC | |
| 2121,TCT,TCC | |
| 2124,AAT,AAC | |
| 2127,AAC,AAC | |
| 2130,TCT,TCT | |
| 2133,ATT,ATC | |
| 2136,GCC,GCT | |
| 2139,ATA,ATC | |
| 2142,CCC,CCC | |
| 2145,ACA,ACC | |
| 2148,AAT,AAC | |
| 2151,TTT,TTC | |
| 2154,ACT,ACC | |
| 2157,ATT,ATC | |
| 2160,AGT,AGC | |
| 2163,GTT,GTG | |
| 2166,ACC,ACC | |
| 2169,ACA,ACA | |
| 2172,GAA,GAG | |
| 2175,ATT,ATC | |
| 2178,CTA,CTG | |
| 2181,CCA,CCT | |
| 2184,GTG,GTG | |
| 2187,TCT,TCC | |
| 2190,ATG,ATG | |
| 2193,ACC,ACC | |
| 2196,AAG,AAG | |
| 2199,ACA,ACC | |
| 2202,TCA,AGC | |
| 2205,GTA,GTG | |
| 2208,GAT,GAC | |
| 2211,TGT,TGC | |
| 2214,ACA,ACC | |
| 2217,ATG,ATG | |
| 2220,TAC,TAC | |
| 2223,ATT,ATC | |
| 2226,TGT,TGC | |
| 2229,GGT,GGC | |
| 2232,GAT,GAT | |
| 2235,TCA,TCC | |
| 2238,ACT,ACC | |
| 2241,GAA,GAG | |
| 2244,TGC,TGC | |
| 2247,AGC,TCC | |
| 2250,AAT,AAC | |
| 2253,CTT,CTG | |
| 2256,TTG,CTG | |
| 2259,TTG,CTG | |
| 2262,CAA,CAG | |
| 2265,TAT,TAC | |
| 2268,GGC,GGC | |
| 2271,AGT,AGC | |
| 2274,TTT,TTC | |
| 2277,TGT,TGC | |
| 2280,ACA,ACC | |
| 2283,CAA,CAG | |
| 2286,TTA,CTG | |
| 2289,AAC,AAT | |
| 2292,CGT,AGA | |
| 2295,GCT,GCC | |
| 2298,TTA,CTG | |
| 2301,ACT,ACA | |
| 2304,GGA,GGG | |
| 2307,ATA,ATC | |
| 2310,GCT,GCC | |
| 2313,GTT,GTG | |
| 2316,GAA,GAA | |
| 2319,CAA,CAG | |
| 2322,GAC,GAC | |
| 2325,AAA,AAG | |
| 2328,AAC,AAC | |
| 2331,ACC,ACC | |
| 2334,CAA,CAA | |
| 2337,GAA,GAG | |
| 2340,GTT,GTG | |
| 2343,TTT,TTC | |
| 2346,GCA,GCC | |
| 2349,CAA,CAA | |
| 2352,GTC,GTG | |
| 2355,AAA,AAG | |
| 2358,CAA,CAG | |
| 2361,ATT,ATC | |
| 2364,TAC,TAC | |
| 2367,AAA,AAG | |
| 2370,ACA,ACC | |
| 2373,CCA,CCT | |
| 2376,CCA,CCT | |
| 2379,ATT,ATC | |
| 2382,AAA,AAG | |
| 2385,GAT,GAC | |
| 2388,TTT,TTC | |
| 2391,GGT,GGC | |
| 2394,GGT,GGC | |
| 2397,TTT,TTC | |
| 2400,AAT,AAT | |
| 2403,TTT,TTC | |
| 2406,TCA,AGC | |
| 2409,CAA,CAG | |
| 2412,ATA,ATT | |
| 2415,TTA,CTG | |
| 2418,CCA,CCC | |
| 2421,GAT,GAT | |
| 2424,CCA,CCT | |
| 2427,TCA,AGC | |
| 2430,AAA,AAG | |
| 2433,CCA,CCC | |
| 2436,AGC,AGC | |
| 2439,AAG,AAG | |
| 2442,AGG,CGG | |
| 2445,TCA,AGC | |
| 2448,TTT,TTC | |
| 2451,ATT,ATC | |
| 2454,GAA,GAG | |
| 2457,GAT,GAC | |
| 2460,CTA,CTG | |
| 2463,CTT,CTG | |
| 2466,TTC,TTC | |
| 2469,AAC,AAC | |
| 2472,AAA,AAA | |
| 2475,GTG,GTG | |
| 2478,ACA,ACA | |
| 2481,CTT,CTG | |
| 2484,GCA,GCC | |
| 2487,GAT,GAC | |
| 2490,GCT,GCC | |
| 2493,GGC,GGC | |
| 2496,TTC,TTC | |
| 2499,ATC,ATC | |
| 2502,AAA,AAG | |
| 2505,CAA,CAG | |
| 2508,TAT,TAT | |
| 2511,GGT,GGC | |
| 2514,GAT,GAT | |
| 2517,TGC,TGT | |
| 2520,CTT,CTG | |
| 2523,GGT,GGC | |
| 2526,GAT,GAC | |
| 2529,ATT,ATT | |
| 2532,GCT,GCC | |
| 2535,GCT,GCC | |
| 2538,AGA,AGG | |
| 2541,GAC,GAT | |
| 2544,CTC,CTG | |
| 2547,ATT,ATT | |
| 2550,TGT,TGC | |
| 2553,GCA,GCC | |
| 2556,CAA,CAG | |
| 2559,AAG,AAG | |
| 2562,TTT,TTT | |
| 2565,AAC,AAC | |
| 2568,GGC,GGA | |
| 2571,CTT,CTG | |
| 2574,ACT,ACA | |
| 2577,GTT,GTG | |
| 2580,TTG,CTG | |
| 2583,CCA,CCT | |
| 2586,CCT,CCT | |
| 2589,TTG,CTG | |
| 2592,CTC,CTG | |
| 2595,ACA,ACC | |
| 2598,GAT,GAT | |
| 2601,GAA,GAG | |
| 2604,ATG,ATG | |
| 2607,ATT,ATC | |
| 2610,GCT,GCC | |
| 2613,CAA,CAG | |
| 2616,TAC,TAC | |
| 2619,ACT,ACA | |
| 2622,TCT,TCT | |
| 2625,GCA,GCC | |
| 2628,CTG,CTG | |
| 2631,TTA,CTG | |
| 2634,GCG,GCC | |
| 2637,GGT,GGC | |
| 2640,ACA,ACA | |
| 2643,ATC,ATC | |
| 2646,ACT,ACA | |
| 2649,TCT,AGC | |
| 2652,GGT,GGC | |
| 2655,TGG,TGG | |
| 2658,ACC,ACA | |
| 2661,TTT,TTT | |
| 2664,GGT,GGA | |
| 2667,GCA,GCA | |
| 2670,GGT,GGC | |
| 2673,GCT,GCC | |
| 2676,GCA,GCT | |
| 2679,TTA,CTG | |
| 2682,CAA,CAG | |
| 2685,ATA,ATC | |
| 2688,CCA,CCC | |
| 2691,TTT,TTT | |
| 2694,GCT,GCT | |
| 2697,ATG,ATG | |
| 2700,CAA,CAG | |
| 2703,ATG,ATG | |
| 2706,GCT,GCC | |
| 2709,TAT,TAC | |
| 2712,AGG,CGG | |
| 2715,TTT,TTC | |
| 2718,AAT,AAC | |
| 2721,GGT,GGC | |
| 2724,ATT,ATC | |
| 2727,GGA,GGA | |
| 2730,GTT,GTG | |
| 2733,ACA,ACC | |
| 2736,CAG,CAG | |
| 2739,AAT,AAT | |
| 2742,GTT,GTG | |
| 2745,CTC,CTG | |
| 2748,TAT,TAC | |
| 2751,GAG,GAG | |
| 2754,AAC,AAC | |
| 2757,CAA,CAG | |
| 2760,AAA,AAG | |
| 2763,TTG,CTG | |
| 2766,ATT,ATC | |
| 2769,GCC,GCC | |
| 2772,AAC,AAC | |
| 2775,CAA,CAG | |
| 2778,TTT,TTC | |
| 2781,AAT,AAC | |
| 2784,AGT,AGC | |
| 2787,GCT,GCC | |
| 2790,ATT,ATC | |
| 2793,GGC,GGC | |
| 2796,AAA,AAG | |
| 2799,ATT,ATC | |
| 2802,CAA,CAG | |
| 2805,GAC,GAC | |
| 2808,TCA,AGC | |
| 2811,CTT,CTG | |
| 2814,TCT,AGC | |
| 2817,TCC,AGC | |
| 2820,ACA,ACA | |
| 2823,GCA,GCA | |
| 2826,AGT,AGC | |
| 2829,GCA,GCC | |
| 2832,CTT,CTG | |
| 2835,GGA,GGA | |
| 2838,AAA,AAG | |
| 2841,CTT,CTG | |
| 2844,CAA,CAG | |
| 2847,GAT,GAC | |
| 2850,GTG,GTG | |
| 2853,GTC,GTC | |
| 2856,AAC,AAC | |
| 2859,CAA,CAG | |
| 2862,AAT,AAT | |
| 2865,GCA,GCC | |
| 2868,CAA,CAG | |
| 2871,GCT,GCA | |
| 2874,TTA,CTG | |
| 2877,AAC,AAC | |
| 2880,ACG,ACC | |
| 2883,CTT,CTG | |
| 2886,GTT,GTC | |
| 2889,AAA,AAG | |
| 2892,CAA,CAG | |
| 2895,CTT,CTG | |
| 2898,AGC,TCC | |
| 2901,TCC,TCC | |
| 2904,AAT,AAC | |
| 2907,TTT,TTC | |
| 2910,GGT,GGC | |
| 2913,GCA,GCC | |
| 2916,ATT,ATC | |
| 2919,TCA,AGC | |
| 2922,AGT,TCT | |
| 2925,GTT,GTG | |
| 2928,TTA,CTG | |
| 2931,AAT,AAC | |
| 2934,GAT,GAT | |
| 2937,ATC,ATC | |
| 2940,CTT,CTG | |
| 2943,TCA,AGC | |
| 2946,CGT,AGA | |
| 2949,CTT,CTG | |
| 2952,GAC,GAC | |
| 2955,AAA,CCT | |
| 2958,GTT,CCT | |
| 2961,GAG,GAG | |
| 2964,GCT,GCC | |
| 2967,GAA,GAG | |
| 2970,GTG,GTG | |
| 2973,CAA,CAG | |
| 2976,ATT,ATC | |
| 2979,GAT,GAC | |
| 2982,AGG,AGA | |
| 2985,TTG,CTG | |
| 2988,ATC,ATC | |
| 2991,ACA,ACA | |
| 2994,GGC,GGC | |
| 2997,AGA,AGA | |
| 3000,CTT,CTG | |
| 3003,CAA,CAG | |
| 3006,AGT,AGC | |
| 3009,TTG,CTC | |
| 3012,CAG,CAG | |
| 3015,ACA,ACA | |
| 3018,TAT,TAC | |
| 3021,GTG,GTG | |
| 3024,ACT,ACC | |
| 3027,CAA,CAG | |
| 3030,CAA,CAG | |
| 3033,TTA,CTG | |
| 3036,ATT,ATC | |
| 3039,AGA,AGA | |
| 3042,GCT,GCC | |
| 3045,GCA,GCC | |
| 3048,GAA,GAG | |
| 3051,ATC,ATT | |
| 3054,AGA,AGA | |
| 3057,GCT,GCC | |
| 3060,TCT,TCT | |
| 3063,GCT,GCC | |
| 3066,AAT,AAT | |
| 3069,CTT,CTG | |
| 3072,GCT,GCC | |
| 3075,GCT,GCC | |
| 3078,ACT,ACC | |
| 3081,AAA,AAG | |
| 3084,ATG,ATG | |
| 3087,TCA,TCT | |
| 3090,GAG,GAG | |
| 3093,TGT,TGT | |
| 3096,GTA,GTG | |
| 3099,CTT,CTG | |
| 3102,GGA,GGC | |
| 3105,CAA,CAG | |
| 3108,TCA,AGC | |
| 3111,AAA,AAG | |
| 3114,AGA,AGA | |
| 3117,GTT,GTG | |
| 3120,GAT,GAC | |
| 3123,TTT,TTT | |
| 3126,TGT,TGC | |
| 3129,GGA,GGC | |
| 3132,AAG,AAG | |
| 3135,GGC,GGC | |
| 3138,TAT,TAC | |
| 3141,CAT,CAC | |
| 3144,CTT,CTG | |
| 3147,ATG,ATG | |
| 3150,TCC,AGC | |
| 3153,TTC,TTC | |
| 3156,CCT,CCT | |
| 3159,CAG,CAG | |
| 3162,TCA,TCT | |
| 3165,GCA,GCC | |
| 3168,CCT,CCT | |
| 3171,CAT,CAC | |
| 3174,GGT,GGC | |
| 3177,GTA,GTG | |
| 3180,GTC,GTG | |
| 3183,TTC,TTT | |
| 3186,TTG,CTG | |
| 3189,CAT,CAC | |
| 3192,GTG,GTG | |
| 3195,ACT,ACA | |
| 3198,TAT,TAT | |
| 3201,GTC,GTG | |
| 3204,CCT,CCC | |
| 3207,GCA,GCT | |
| 3210,CAA,CAA | |
| 3213,GAA,GAG | |
| 3216,AAG,AAG | |
| 3219,AAC,AAT | |
| 3222,TTC,TTC | |
| 3225,ACA,ACC | |
| 3228,ACT,ACC | |
| 3231,GCT,GCT | |
| 3234,CCT,CCA | |
| 3237,GCC,GCC | |
| 3240,ATT,ATC | |
| 3243,TGT,TGC | |
| 3246,CAT,CAC | |
| 3249,GAT,GAC | |
| 3252,GGA,GGC | |
| 3255,AAA,AAA | |
| 3258,GCA,GCC | |
| 3261,CAC,CAC | |
| 3264,TTT,TTT | |
| 3267,CCT,CCT | |
| 3270,CGT,AGA | |
| 3273,GAA,GAA | |
| 3276,GGT,GGC | |
| 3279,GTC,GTG | |
| 3282,TTT,TTC | |
| 3285,GTT,GTG | |
| 3288,TCA,TCC | |
| 3291,AAT,AAC | |
| 3294,GGC,GGC | |
| 3297,ACA,ACC | |
| 3300,CAC,CAT | |
| 3303,TGG,TGG | |
| 3306,TTT,TTC | |
| 3309,GTA,GTG | |
| 3312,ACA,ACA | |
| 3315,CAA,CAG | |
| 3318,AGG,CGG | |
| 3321,AAT,AAC | |
| 3324,TTT,TTC | |
| 3327,TAT,TAC | |
| 3330,GAA,GAG | |
| 3333,CCA,CCC | |
| 3336,CAA,CAG | |
| 3339,ATC,ATC | |
| 3342,ATT,ATC | |
| 3345,ACT,ACC | |
| 3348,ACA,ACC | |
| 3351,GAC,GAC | |
| 3354,AAC,AAC | |
| 3357,ACA,ACC | |
| 3360,TTT,TTC | |
| 3363,GTG,GTG | |
| 3366,TCT,TCT | |
| 3369,GGT,GGC | |
| 3372,AAC,AAC | |
| 3375,TGT,TGC | |
| 3378,GAT,GAC | |
| 3381,GTT,GTC | |
| 3384,GTA,GTG | |
| 3387,ATA,ATC | |
| 3390,GGA,GGC | |
| 3393,ATT,ATT | |
| 3396,GTC,GTG | |
| 3399,AAC,AAC | |
| 3402,AAC,AAT | |
| 3405,ACA,ACC | |
| 3408,GTT,GTG | |
| 3411,TAT,TAC | |
| 3414,GAT,GAC | |
| 3417,CCT,CCT | |
| 3420,TTG,CTG | |
| 3423,CAA,CAG | |
| 3426,CCT,CCC | |
| 3429,GAA,GAG | |
| 3432,TTA,CTG | |
| 3435,GAC,GAC | |
| 3438,TCA,AGC | |
| 3441,TTC,TTC | |
| 3444,AAG,AAA | |
| 3447,GAG,GAG | |
| 3450,GAG,GAA | |
| 3453,TTA,CTG | |
| 3456,GAT,GAC | |
| 3459,AAA,AAG | |
| 3462,TAT,TAC | |
| 3465,TTT,TTT | |
| 3468,AAG,AAG | |
| 3471,AAT,AAC | |
| 3474,CAT,CAC | |
| 3477,ACA,ACA | |
| 3480,TCA,AGC | |
| 3483,CCA,CCC | |
| 3486,GAT,GAC | |
| 3489,GTT,GTG | |
| 3492,GAT,GAC | |
| 3495,TTA,CTG | |
| 3498,GGT,GGC | |
| 3501,GAC,GAT | |
| 3504,ATC,ATC | |
| 3507,TCT,AGC | |
| 3510,GGC,GGA | |
| 3513,ATT,ATC | |
| 3516,AAT,AAT | |
| 3519,GCT,GCC | |
| 3522,TCA,AGC | |
| 3525,GTT,GTC | |
| 3528,GTA,GTG | |
| 3531,AAC,AAC | |
| 3534,ATT,ATC | |
| 3537,CAA,CAG | |
| 3540,AAA,AAA | |
| 3543,GAA,GAG | |
| 3546,ATT,ATC | |
| 3549,GAC,GAC | |
| 3552,CGC,CGG | |
| 3555,CTC,CTG | |
| 3558,AAT,AAC | |
| 3561,GAG,GAG | |
| 3564,GTT,GTG | |
| 3567,GCC,GCC | |
| 3570,AAG,AAG | |
| 3573,AAT,AAT | |
| 3576,TTA,CTG | |
| 3579,AAT,AAC | |
| 3582,GAA,GAG | |
| 3585,TCT,AGC | |
| 3588,CTC,CTG | |
| 3591,ATC,ATC | |
| 3594,GAT,GAC | |
| 3597,CTC,CTG | |
| 3600,CAA,CAA | |
| 3603,GAA,GAA | |
| 3606,CTT,CTG | |
| 3609,GGA,GGG | |
| 3612,AAG,AAG | |
| 3615,TAT,TAC | |
| 3618,GAG,GAG | |
| 3621,CAG,CAG | |
| 3624,TAT,TAC | |
| 3627,ATA,ATC | |
| 3630,AAA,AAG | |
| 3633,TGG,TGG | |
| 3636,CCA,CCC | |
| 3639,TGG,TGG | |
| 3642,TAC,TAC | |
| 3645,ATT,ATC | |
| 3648,TGG,TGG | |
| 3651,CTA,CTG | |
| 3654,GGT,GGC | |
| 3657,TTT,TTT | |
| 3660,ATA,ATC | |
| 3663,GCT,GCC | |
| 3666,GGC,GGA | |
| 3669,TTG,CTG | |
| 3672,ATT,ATT | |
| 3675,GCC,GCC | |
| 3678,ATA,ATC | |
| 3681,GTA,GTG | |
| 3684,ATG,ATG | |
| 3687,GTG,GTC | |
| 3690,ACA,ACA | |
| 3693,ATT,ATC | |
| 3696,ATG,ATG | |
| 3699,CTT,CTG | |
| 3702,TGC,TGT | |
| 3705,TGT,TGC | |
| 3708,ATG,ATG | |
| 3711,ACC,ACC | |
| 3714,AGT,AGC | |
| 3717,TGC,TGC | |
| 3720,TGT,TGT | |
| 3723,AGT,AGC | |
| 3726,TGT,TGC | |
| 3729,CTC,CTG | |
| 3732,AAG,AAG | |
| 3735,GGC,GGC | |
| 3738,TGT,TGT | |
| 3741,TGT,TGT | |
| 3744,TCT,AGC | |
| 3747,TGT,TGT | |
| 3750,GGA,GGC | |
| 3753,TCC,AGC | |
| 3756,TGC,TGC | |
| 3759,TGC,TGC | |
| 3762,AAA,AAG | |
| 3765,TTT,TTC | |
| 3768,GAT,GAC | |
| 3771,GAA,GAG | |
| 3774,GAC,GAC | |
| 3777,GAC,GAT | |
| 3780,TCT,TCT | |
| 3783,GAG,GAG | |
| 3786,CCA,CCC | |
| 3789,GTG,GTG | |
| 3792,CTC,CTG | |
| 3795,AAA,AAG | |
| 3798,GGA,GGC | |
| 3801,GTC,GTG | |
| 3804,AAA,AAA | |
| 3807,TTA,CTG | |
| 3810,CAT,CAC | |
| 3813,TAC,TAC | |
| 3816,ACA,ACA | |
| 3819,TAA,TGA'''.strip().split('\n')] | |
| aa = {'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M','ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T','AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K','AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R','CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L','CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P','CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q','CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R','GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V','GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A','GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E','GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G','TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S','TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L','TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_','TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'} | |
| def delta_codon(viral,vaccine): | |
| delta = [] | |
| for i,vr_base in enumerate(viral): | |
| vc_base = vaccine[i] | |
| if vr_base != vc_base: | |
| delta.append((i,vc_base)) | |
| return delta | |
| cases = defaultdict(lambda : defaultdict(int)) | |
| for codon in codon_comparison: | |
| vr_aa = aa[codon[0]] | |
| for i,base in enumerate(codon[0]): | |
| key = (i,base,vr_aa) | |
| cases[key]['all'] += 1 | |
| deltas = delta_codon(codon[0],codon[1]) | |
| for delta in deltas: | |
| i,vc_base = delta | |
| key = (i,codon[0][i],vr_aa) | |
| cases[key][vc_base] += 1 | |
| probs = defaultdict(lambda : defaultdict(float)) | |
| for key, case in cases.items(): | |
| if len(case) > 1: | |
| for variant, count in case.items(): | |
| case_total = float(case['all']) | |
| if variant != 'all': | |
| probs[key][variant] = count / case_total | |
| def simulate(pr): | |
| vr_codons = [x[0] for x in codon_comparison] | |
| new_codons = [] | |
| for codon in vr_codons: | |
| vr_aa = aa[codon] | |
| new_codon = '' | |
| for i,base in enumerate(codon): | |
| key = (i,base,vr_aa) | |
| new_base = base | |
| if key in pr: | |
| w = list(pr[key].values()) | |
| w.append(1 - sum(w)) | |
| c = list(pr[key].keys()) | |
| c.append(base) | |
| new_base = random.choices(c, weights=w, k=1)[0] | |
| new_codon += new_base | |
| new_codons.append(new_codon) | |
| return new_codons | |
| vaccine_seq = ''.join([x[1] for x in codon_comparison]) | |
| simulate_seq = ''.join(simulate(probs)) | |
| def match_percentage(a,b): | |
| count = 0 | |
| for i,a_base in enumerate(a): | |
| b_base = b[i] | |
| if a_base == b_base: | |
| count += 1 | |
| return float(count) / len(a) * 100 | |
| mp = match_percentage(vaccine_seq,simulate_seq) | |
| print('>generated vaccine sequence, ' + str(mp) + '% match with known vaccine sequence') | |
| print(simulate_seq) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment