knitr::opts_chunk$set(echo = TRUE)
#Only needs to be run once per computer
# if(!("BiocManager" %in% rownames(installed.packages()))) install.packages("BiocManager")
# BiocManager::install("remotes", dependencies=T)
# BiocManager::install("hfang-bristol/XGR", dependencies=T)
#XGR Pathway Analysis
library(XGR)
#Data Cleaning and Plotting Tools
library(tidyverse)
#Rmarkdown table creation
library(kableExtra)
#Color sets for Plots
library(RColorBrewer)
#Enrichment Analysis from differential expression experiment
load('allcontrasts.RData')
head(allcontrasts %>%
dplyr::select(trt,gene,Tissue,pvalue,padj,log2FoldChange) %>%
arrange(pvalue)) %>%
kable(booktabs = T, caption = "All Contrasts Dataset") %>%
kable_styling(full_width = T,bootstrap_options = c("striped",'hover'), font_size = 9)
trt | gene | Tissue | pvalue | padj | log2FoldChange |
---|---|---|---|---|---|
O3_Stress_vs_Air_Stress | Pkd2l1 | AD | 0 | 0 | 5.088033 |
O3_Stress_vs_Air_Stress | Tex261 | AD | 0 | 0 | 2.468787 |
O3_SI_vs_Air_SI | Pkd2l1 | AD | 0 | 0 | 5.014162 |
O3_None_vs_Air_None | Pkd2l1 | AD | 0 | 0 | 5.250183 |
O3_None_vs_Air_None | Tex261 | AD | 0 | 0 | 2.438329 |
O3_Stress_vs_Air_Stress | Slc12a3 | liver | 0 | 0 | 4.507781 |
Find all Unique Contrasts by looking at all unique treatments in our experiment
Our default background is all annotated genes, however, we want to limit our background to only genes in our data in order to properly see what pathways are enriched in our data set.
This background gene set falls into the competitive null hypothesis since we are asking if the genes in our set are more differentially expressed then all backgrounds as opposed to a self contained analysis
mycontrasts <- unique(allcontrasts$trt)
mycontrasts
## [1] "Air_SI_vs_Air_None" "Air_Stress_vs_Air_None"
## [3] "O3_SI_vs_O3_None" "O3_Stress_vs_O3_None"
## [5] "O3_Stress_vs_Air_Stress" "O3_SI_vs_Air_SI"
## [7] "O3_None_vs_Air_None" "Air_Stress_vs_Air_SI"
## [9] "O3_Stress_vs_O3_SI"
ht_contrast_data <- allcontrasts %>% dplyr::filter(Tissue == 'HT')
#Turn to upper
ht_background <-unique(toupper(ht_contrast_data$gene))
tail(ht_background)
## [1] "SNOU89" "SNOZ30" "SNOPSI28S-3327" "SNOSNR60_Z15"
## [5] "STE2" "UC_338"
This function takes our DESeq2 output and turns them into the vector of significant genes that XGR needs
However, if you started with just a vector of significant genes and a background this step can be skipped
Simplifies analysis for many contrasts using xEnricherGenes and xEnrichConciser
This function takes our input contrast data, filters it at an FDR level of 0.1 and a log2 Fold Change to what you need for a specific contrast to get our vector of input genes for xEnricherGenes
xEnricherGenes then takes this vector of symbols, compares them to our selected background for our chosen ontology set and returns a list containing the information we need to visualize our enriched pathways
If you turn the tree functionality on you can remove redundant terms but significantly increase run time
xEnrichConciser is used to clarify the results by removing redundant terms.
Generate_XGR_list <- function(data, contrast, mygo , tree = F, l2fc ,background){
mysymbol <- data %>% dplyr::filter(padj < 0.1, abs(log2FoldChange) > l2fc, trt == contrast) %>%
dplyr::select(gene, padj) %>% mutate(symbol = toupper(gene)) %>%
pull(symbol)
myxgr <- xEnricherGenes(data = mysymbol, ontology = mygo, background = background,ontology.algorithm = ifelse(tree == F,"none","lea"))
if(tree == F){myxgr <- try(xEnrichConciser(myxgr))}
}
This lets us compare enriched pathways by contrasts
Repeats the function for all of our contrasts
myxgr_list_hallmark <- list(
Generate_XGR_list(contrast = mycontrasts[1], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background),
Generate_XGR_list(contrast = mycontrasts[2], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background),
Generate_XGR_list(contrast = mycontrasts[3], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background),
Generate_XGR_list(contrast = mycontrasts[4], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background),
Generate_XGR_list(contrast = mycontrasts[5], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0, background = ht_background),
Generate_XGR_list(contrast = mycontrasts[6], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background),
Generate_XGR_list(contrast = mycontrasts[7], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background),
Generate_XGR_list(contrast = mycontrasts[8], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background),
Generate_XGR_list(contrast = mycontrasts[9], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background))
names(myxgr_list_hallmark) <- mycontrasts
contrasts_of_interest <- c("O3_Stress_vs_Air_Stress","O3_SI_vs_Air_SI","O3_None_vs_Air_None")
input_contrasts <- myxgr_list_hallmark[contrasts_of_interest]
p <- xEnrichCompare(input_contrasts, displayBy="fdr", FDR.cutoff = 0.1, wrap.width = 45) +
scale_fill_brewer(palette='Set2') +
ggtitle('Hallmark Pathway Enrichments (FDR < 0.1)')
p
one_contrast <- Generate_XGR_list(contrast = mycontrasts[5], mygo = "MsigdbH", data = ht_contrast_data, l2fc = 0,background = ht_background)
one_contrast_plot <- xEnrichBarplot(one_contrast, top_num=10, displayBy="fc")
one_contrast_plot
head(xEnrichViewer(input_contrasts[[1]], top_num = 250, sortBy = "adjp", details = T) %>%
mutate(Contrast = names(input_contrasts)[1]),1) %>%
kable(booktabs = T, caption = "xEnrichViewer Output") %>%
kable_styling(full_width = T,bootstrap_options = c("striped",'hover'), font_size = 9)
name | nAnno | nOverlap | fc | zscore | pvalue | adjp | or | CIl | CIu | distance | namespace | members_Overlap | members_Anno | Contrast |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Genes encoding components of blood coagulation system; also up-regulated in platelets. | 125 | 73 | 1.46 | 4.23 | 2.2e-05 | 0.0011 | 2.12 | 1.47 | 3.1 | H | A2M, ACOX2, ADAM9, ANXA1, APOA1, APOC1, APOC2, APOC3, C1QA, C2, C3, C8A, C8B, C8G, C9, CAPN2, CAPN5, CASP9, CFH, CFI, CLU, CPB2, CPN1, CTSH, CTSK, DCT, DPP4, DUSP6, F10, F11, F12, F13B, F2, F9, FGA, FGG, GNB2, GSN, HMGCS2, HNF4A, HPN, HRG, HTRA1, ISCU, ITIH1, KLKB1, LAMP2, MAFF, MMP14, MMP2, P2RY1, PEF1, PLEK, PLG, PROC, PROS1, PROZ, PRSS23, RAC1, RAPGEF3, RGN, SERPINA1, SERPINC1, SERPING1, SIRT2, TF, TFPI2, THBD, THBS1, TIMP3, TMPRSS6, USP11, VWF | A2M, ACOX2, ADAM9, ANXA1, APOA1, APOC1, APOC2, APOC3, ARF4, BMP1, C1QA, C1R, C1S, C2, C3, C8A, C8B, C8G, C9, CAPN2, CAPN5, CASP9, CD9, CFB, CFD, CFH, CFI, CLU, COMP, CPB2, CPN1, CPQ, CRIP2, CSRP1, CTSB, CTSE, CTSH, CTSK, DCT, DPP4, DUSP14, DUSP6, F10, F11, F12, F13B, F2, F2RL2, F3, F8, F9, FBN1, FGA, FGG, FN1, FURIN, FYN, GDA, GNB2, GNG12, GP1BA, GP9, GSN, HMGCS2, HNF4A, HPN, HRG, HTRA1, ISCU, ITGA2, ITGB3, ITIH1, KLF7, KLKB1, LAMP2, LEFTY2, LGMN, LRP1, LTA4H, MAFF, MASP2, MEP1A, MMP14, MMP15, MMP2, MMP9, MSRB2, MST1, OLR1, P2RY1, PDGFB, PEF1, PF4, PLAT, PLAU, PLEK, PLG, PREP, PROC, PROS1, PROZ, PRSS23, RAC1, RAPGEF3, RGN, S100A1, S100A13, SERPINA1, SERPINB2, SERPINC1, SERPINE1, SERPING1, SH2B2, SIRT2, SPARC, TF, TFPI2, THBD, THBS1, TIMP1, TIMP3, TMPRSS6, USP11, VWF, WDR1 | O3_Stress_vs_Air_Stress |
network_input <- allcontrasts %>%
dplyr::filter(trt == 'O3_Stress_vs_Air_Stress') %>%
mutate(Symbol = toupper(gene)) %>%
dplyr::select(Symbol,padj)
RData.location <- "http://galahad.well.ox.ac.uk/bigdata"
subg_func <- xSubneterGenes(data=network_input, network="STRING_high", subnet.size=75, RData.location=RData.location)
subg <- subg_func
pattern <- -log10(as.numeric(V(subg)$significance))
xVisNet(g=subg, pattern=pattern, vertex.shape="sphere", vertex.label.font=2, newpage=F)
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-2 kableExtra_1.1.0 forcats_0.4.0 stringr_1.4.0
## [5] dplyr_0.8.3 purrr_0.3.3 readr_1.3.1 tidyr_1.0.0
## [9] tibble_3.0.2 tidyverse_1.3.0 XGR_1.1.7 ggplot2_3.2.1
## [13] dnet_1.1.7 supraHex_1.24.0 hexbin_1.28.0 igraph_1.2.4.2
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-142 bitops_1.0-6 fs_1.3.1
## [4] lubridate_1.7.4 webshot_0.5.2 httr_1.4.1
## [7] GenomeInfoDb_1.22.0 Rgraphviz_2.30.0 tools_3.6.2
## [10] backports_1.1.5 R6_2.4.1 DBI_1.1.0
## [13] lazyeval_0.2.2 BiocGenerics_0.32.0 colorspace_1.4-1
## [16] withr_2.1.2 tidyselect_1.1.0 compiler_3.6.2
## [19] graph_1.64.0 cli_2.0.1 rvest_0.3.5
## [22] xml2_1.2.2 network_1.16.0 labeling_0.3
## [25] scales_1.1.0 RCircos_1.2.1 digest_0.6.23
## [28] rmarkdown_2.0 XVector_0.26.0 pkgconfig_2.0.3
## [31] htmltools_0.4.0 highr_0.8 dbplyr_1.4.2
## [34] rlang_0.4.6 readxl_1.3.1 rstudioapi_0.10
## [37] farver_2.0.1 generics_0.0.2 jsonlite_1.6
## [40] statnet.common_4.3.0 RCurl_1.98-1.1 magrittr_1.5
## [43] ggnetwork_0.5.1 GenomeInfoDbData_1.2.2 Matrix_1.2-18
## [46] fansi_0.4.1 Rcpp_1.0.3 munsell_0.5.0
## [49] S4Vectors_0.24.3 ape_5.3 lifecycle_0.2.0
## [52] stringi_1.4.4 yaml_2.2.0 MASS_7.3-51.4
## [55] zlibbioc_1.32.0 plyr_1.8.5 grid_3.6.2
## [58] parallel_3.6.2 ggrepel_0.8.1 crayon_1.3.4
## [61] lattice_0.20-38 haven_2.2.0 hms_0.5.3
## [64] sna_2.5 knitr_1.26 pillar_1.4.3
## [67] GenomicRanges_1.38.0 reshape2_1.4.3 stats4_3.6.2
## [70] reprex_0.3.0 glue_1.3.1 evaluate_0.14
## [73] modelr_0.1.5 vctrs_0.3.1 cellranger_1.1.0
## [76] gtable_0.3.0 assertthat_0.2.1 xfun_0.11
## [79] broom_0.7.0 coda_0.19-3 viridisLite_0.3.0
## [82] IRanges_2.20.2 ellipsis_0.3.0