suppressPackageStartupMessages({
library(epitopeR)
library(tidyverse)
library(ggVennDiagram)
})
<- "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT" spike
<- mhcI(ag_present = "HLA-A*02:01", ag_stim = spike) spike_peps
<- c("YLQPRTFLL", "RLQSLQTYV","VLNDILSRL", "RLNEVAKNL", "NLNESLIDL", "FIAGLIAIV", "TLDSKTQSL", "VVFLHVTYV", "KLPDDFTGCV", "SIIAYTMSL", "KIADYNYKL", "LLFNKVTLA","ALNTLVKQL", "RLDKVEAEV", "LITGRLQSL", "RLITGRLQSL", "FLHVTYVPA", "KIYSKHTPI") a2_spike_peps
<- as.vector(unique(spike_peps$pep_stim))
spike_peps_vector
ggVennDiagram(x = list(spike_peps_vector, a2_spike_peps),
category.names = c("Predicted Peptides", "Known Peptides"),
set_size = 2,
label = "count") +
theme(legend.position = "none")
13 of 18 known peptides epitopes are detected using standard parameters. An additional 140 peptide epitopes are also predicted. The remaining 5 peptide epitopes can be detected by increasing the threshold filters, however, this will also increase the likelihood of false-positive peptide prediction.
<- spike
omicron_spike
str_sub(omicron_spike, 67, 67) <- "V"
str_sub(omicron_spike, 95, 95) <- "I"
str_sub(omicron_spike, 145, 145) <- "D"
str_sub(omicron_spike, 212, 212) <- "I"
str_sub(omicron_spike, 339, 339) <- "D"
str_sub(omicron_spike, 371, 371) <- "L"
str_sub(omicron_spike, 373, 373) <- "P"
str_sub(omicron_spike, 375, 375) <- "F"
str_sub(omicron_spike, 417, 417) <- "N"
str_sub(omicron_spike, 440, 440) <- "K"
str_sub(omicron_spike, 446, 446) <- "S"
str_sub(omicron_spike, 477, 477) <- "N"
str_sub(omicron_spike, 478, 478) <- "K"
str_sub(omicron_spike, 484, 484) <- "A"
str_sub(omicron_spike, 493, 493) <- "R"
str_sub(omicron_spike, 496, 496) <- "S"
str_sub(omicron_spike, 498, 498) <- "R"
str_sub(omicron_spike, 501, 501) <- "Y"
str_sub(omicron_spike, 505, 505) <- "H"
str_sub(omicron_spike, 547, 547) <- "K"
str_sub(omicron_spike, 614, 614) <- "G"
str_sub(omicron_spike, 655, 655) <- "Y"
str_sub(omicron_spike, 679, 679) <- "K"
str_sub(omicron_spike, 681, 681) <- "H"
str_sub(omicron_spike, 764, 764) <- "K"
str_sub(omicron_spike, 796, 796) <- "Y"
str_sub(omicron_spike, 856, 856) <- "K"
str_sub(omicron_spike, 954, 954) <- "H"
str_sub(omicron_spike, 969, 969) <- "K"
str_sub(omicron_spike, 981, 981) <- "F"
str_sub(omicron_spike, 214, 214) <- "EPER"
str_sub(omicron_spike, 211, 212) <- ""
str_sub(omicron_spike, 141, 144) <- ""
str_sub(omicron_spike, 69, 70) <- ""
<- mhcI(ag_present = "HLA-A*02:01", ag_stim = omicron_spike) omicron_spike_peps
<- as.vector(unique(omicron_spike_peps$pep_stim))
omicron_peps_vector
ggVennDiagram(x=list(spike_peps_vector, omicron_peps_vector),
category.names= c("Original Spike Protein", "Omicron Spike Protein"),
set_size = 2,
label="count") +
theme(legend.position = "none")
%>%
spike_peps filter(!pep_stim %in% omicron_spike_peps$pep_stim) %>%
filter(pep_stim %in% a2_spike_peps)
#> # A tibble: 2 × 12
#> allele start end length pep_s…¹ antigen ic50 perce…² stren…³ stren…⁴ method
#> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 HLA-A… 417 425 9 KIADYN… spike 36.1 "" strong "" ann
#> 2 HLA-A… 976 984 9 VLNDIL… spike 33.6 "" strong "" ann
#> # … with 1 more variable: ag_type <chr>, and abbreviated variable names
#> # ¹pep_stim, ²percentile_rank, ³strength_ic50, ⁴strength_rank
#* 1. load libraries *#
library(msa) # library to for alignment plots
library(Biostrings) # library for AAStringSet
library(here) # path to save plots
#* 2. check s417 and 976 *#
# The K-> N mutation at 417 keeps this peptide from being a strong binder
<- substr(spike, 417, 425) # "KIADYNYKL"
s417
# The L->F mutation at 981 alters this peptide's binding ability
<- substr(spike, 976, 984) # "VLNDILSRL"
s976
#* 3. align spike and omicron_spike *#
<- AAStringSet(c(spike, omicron_spike))
seq_in names(seq_in) <- c("spike", "omicron_spike")
<- msa(seq_in, type = "protein", order = "input")
aln
# pull out aligned sequences
<- aln@unmasked$spike %>% as.character()
spk_algn <- aln@unmasked$omicron_spike %>% as.character()
omc_algn
#* 4. find locations of s417 and s976*#
<- str_locate(spk_algn, s417)[1,1]
s417_st <- str_locate(spk_algn, s417)[1,2]
s417_ed == str_sub(omc_algn, s417_st, s417_ed)
s417 <- str_sub(spk_algn, s417_st, s417_ed)
s417_algn
<- str_locate(spk_algn, s976)[1,1]
s976_st <- str_locate(spk_algn, s976)[1,2]
s976_ed == str_sub(spk_algn, s976_st, s976_ed)
s976 <- str_sub(spk_algn, s976_st, s976_ed)
s976_algn
#* 5. pull out sub_sequence starting from s417 and ending at s976 *#
<- str_sub(spk_algn, s417_st, s976_ed)
spk_algn <- str_sub(omc_algn, s417_st, s976_ed)
omc_algn
#* 6. add - to s417 and s976 to make them have same length with aligned sub spike and sub omicron-spike *#
<- nchar(omc_algn) - nchar(s417_algn)
s417_n <- paste0(s417_algn, strrep("-", s417_n))
s417_algn
<- nchar(omc_algn) - nchar(s976_algn)
s976_n <- paste0(strrep("-", s976_n), s976_algn)
s976_algn
#* 6. put all sub-sequences into one AAStringSet and plot *#
<- AAStringSet(c(spk_algn, omc_algn, s417_algn, s976_algn))
seq4plot names(seq4plot) <- c("spik", "omicron_spike", "s417", "s976")
# fake sub alignment
<- as(AAMultipleAlignment(seq4plot),
aln4plot "MsaAAMultipleAlignment")
<- "s417_"
fn <- c(1,9)
seq_pos
<- "s976_"
fn <- c(nchar(s976_algn)-9+1,nchar(s976_algn))
seq_pos
<- "s417_s976_"
fn <- c(1,nchar(s976_algn))
seq_pos
## create PDF file according to some custom settings
<- function(fl_nm, seq_nm, pos) {
PlotAlgn <- tempfile(pattern=fl_nm,
tmpFile tmpdir=here(),
fileext=".pdf")
msaPrettyPrint(seq_nm,
pos,file = tmpFile,
output = "pdf",
showNames = "left",
showNumbering = "right",
showLogo = "top",
showConsensus = "bottom",
logoColors = "rasmol",
verbose = FALSE,
askForOverwrite = FALSE,
showLegend = FALSE)
}
PlotAlgn(fl_nm = "s417_",
seq_nm = aln4plot,
pos = c(1,9))
PlotAlgn(fl_nm = "s976_",
seq_nm = aln4plot,
pos = c(nchar(s976_algn)-9+1, nchar(s976_algn)))
PlotAlgn(fl_nm = "s417_s976_",
seq_nm = aln4plot,
pos = c(1, nchar(s976_algn)))
::include_graphics(system.file("extdata/vgn/plot/covid_s417.pdf",
knitrpackage = "epitopeR"))
::include_graphics(system.file("extdata/vgn/plot/covid_s976.pdf",
knitrpackage = "epitopeR"))
For those interested in this question, this manuscript has done an excellent job of elaborating the details of the WT->Omicron spike response. Naranbhai V, Nathan A, Kaseke C, Berrios C, Khatri A, Choi S, Getz MA, Tano-Menka R, Ofoman O, Gayton A, Senjobe F, Zhao Z, St Denis KJ, Lam EC, Carrington M, Garcia-Beltran WF, Balazs AB, Walker BD, Iafrate AJ, Gaiha GD. T cell reactivity to the SARS-CoV-2 Omicron variant is preserved in most but not all individuals. Cell. 2022 Mar 17;185(6):1041-1051.e6. doi: 10.1016/j.cell.2022.01.029. Epub 2022 Feb 3. Erratum in: Cell. 2022 Mar 31;185(7):1259. PMID: 35202566; PMCID: PMC8810349.
#Additional References 1. Spike protein sequence:Uniprot 2. A02:01 binding peptides: Shomuradova 2020, Immunity, PMC7664363 and Immudex 3. Mutations in spike for Omicron - D’Arminio 2022, Molecules