Contents

knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
                      dev="png", message=FALSE, error=FALSE, warning=TRUE)

Citation: if you use MAGeCKFlute in published research, please cite: Binbin Wang, Mei Wang, Wubing Zhang. “Integrative analysis of pooled CRISPR genetic screens using MAGeCKFlute.” Nature Protocols (2019), doi: 10.1038/s41596-018-0113-7.

0.1 Install and load the packages

if(!"MAGeCKFlute" %in% installed.packages()) BiocManager::install("MAGeCKFlute")
if(!"ggplot2" %in% installed.packages()) BiocManager::install("ggplot2")
library(MAGeCKFlute)
library(ggplot2)

1 Input data

MAGeCKFlute requires a weighted gene list for enrichment analysis.

file1 = file.path(system.file("extdata", package = "MAGeCKFlute"),
                  "testdata/rra.gene_summary.txt")
gdata = ReadRRA(file1)
genelist = gdata$Score
names(genelist) = gdata$id
genelist = sort(genelist, decreasing = TRUE)
head(genelist)
##    Jak1   Stat1  Ifngr2  Ifngr1     B2m   H2-D1 
## 10.7440  9.0330  5.6097  5.4719  4.3433  4.2110

2 Enrichment analysis methods

2.1 Hypergeometric test

# Alternative functions EnrichAnalyzer and enrich.HGT.
hgtRes1 = EnrichAnalyzer(genelist[1:200], method = "HGT", 
                         type = "Pathway", organism = "mmu")
head(hgtRes1@result)
##                                              ID                  Description
## BIOCARTA_IFNG_PATHWAY     BIOCARTA_IFNG_PATHWAY                 IFNG PATHWAY
## REACTOME_877312                 REACTOME_877312 Regulation of IFNG signaling
## BIOCARTA_IL22BP_PATHWAY BIOCARTA_IL22BP_PATHWAY               IL22BP PATHWAY
## BIOCARTA_IFNA_PATHWAY     BIOCARTA_IFNA_PATHWAY                 IFNA PATHWAY
## BIOCARTA_TID_PATHWAY       BIOCARTA_TID_PATHWAY                  TID PATHWAY
## REACTOME_877300                 REACTOME_877300   Interferon gamma signaling
##                               NES       pvalue     p.adjust GeneRatio BgRatio
## BIOCARTA_IFNG_PATHWAY   18.307949 9.426335e-11 5.061942e-08     5/107  6/9260
## REACTOME_877312          9.714240 2.444382e-05 4.090024e-03     3/107  9/9260
## BIOCARTA_IL22BP_PATHWAY 15.317503 2.444382e-05 4.090024e-03     3/107  9/9260
## BIOCARTA_IFNA_PATHWAY   13.883283 3.046573e-05 4.090024e-03     4/107 18/9260
## BIOCARTA_TID_PATHWAY     9.455281 4.062445e-05 4.363065e-03     4/107 19/9260
## REACTOME_877300          9.714240 9.124919e-05 6.453154e-03     3/107 12/9260
##                                                geneID
## BIOCARTA_IFNG_PATHWAY   16451/16452/20846/15979/15980
## REACTOME_877312                     16452/15979/15980
## BIOCARTA_IL22BP_PATHWAY             16451/16452/20846
## BIOCARTA_IFNA_PATHWAY         16451/15975/15976/20846
## BIOCARTA_TID_PATHWAY          16452/22059/15979/15980
## REACTOME_877300                     16452/15979/15980
##                                              geneName Count
## BIOCARTA_IFNG_PATHWAY   Jak1/Jak2/Stat1/Ifngr1/Ifngr2     5
## REACTOME_877312                    Jak2/Ifngr1/Ifngr2     3
## BIOCARTA_IL22BP_PATHWAY               Jak1/Jak2/Stat1     3
## BIOCARTA_IFNA_PATHWAY        Jak1/Ifnar1/Ifnar2/Stat1     4
## BIOCARTA_TID_PATHWAY         Jak2/Trp53/Ifngr1/Ifngr2     4
## REACTOME_877300                    Jak2/Ifngr1/Ifngr2     3
# hgtRes2 = enrich.HGT(genelist[1:200])
# head(hgtRes2@result)

2.2 Over-representation test

The ORT and GSEA are implemented in the R package clusterProfiler (Yu et al. 2012). So please install it prior to the analysis.

if(!"clusterProfiler" %in% installed.packages()){
  BiocManager::install("clusterProfiler")
}
library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.ORT.
ortRes1 = EnrichAnalyzer(genelist[1:200], method = "ORT", 
                         type = "KEGG", organism = "mmu")
head(ortRes1@result)
##                          ID
## KEGG_mmu05140 KEGG_mmu05140
## KEGG_mmu05235 KEGG_mmu05235
## KEGG_mmu04612 KEGG_mmu04612
## KEGG_mmu05340 KEGG_mmu05340
## KEGG_mmu05212 KEGG_mmu05212
## KEGG_mmu04658 KEGG_mmu04658
##                                                          Description       NES
## KEGG_mmu05140                                          Leishmaniasis 17.491523
## KEGG_mmu05235 PD-L1 expression and PD-1 checkpoint pathway in cancer 17.795521
## KEGG_mmu04612                    Antigen processing and presentation  9.038750
## KEGG_mmu05340                               Primary immunodeficiency  5.717129
## KEGG_mmu05212                                      Pancreatic cancer 13.069760
## KEGG_mmu04658                       Th1 and Th2 cell differentiation 18.307949
##                     pvalue    p.adjust GeneRatio BgRatio
## KEGG_mmu05140 1.530265e-05 0.001254818      7/54 70/4536
## KEGG_mmu05235 5.495082e-04 0.016338254      6/54 88/4536
## KEGG_mmu04612 6.199206e-04 0.016338254      6/54 90/4536
## KEGG_mmu05340 7.969880e-04 0.016338254      4/54 36/4536
## KEGG_mmu05212 1.927944e-03 0.031618287      5/54 76/4536
## KEGG_mmu04658 3.670334e-03 0.044654889      5/54 88/4536
##                                                  geneID
## KEGG_mmu05140 16451/20846/15980/15979/16452/17357/16412
## KEGG_mmu05235       16451/20846/15980/15979/16452/13645
## KEGG_mmu04612       12010/14964/21354/21355/19727/53970
## KEGG_mmu05340                   21354/21355/19727/53970
## KEGG_mmu05212             16451/20846/12575/13645/22059
## KEGG_mmu04658             16451/20846/15980/15979/16452
##                                                   geneName Count
## KEGG_mmu05140 Jak1/Stat1/Ifngr2/Ifngr1/Jak2/Marcksl1/Itgb1     7
## KEGG_mmu05235            Jak1/Stat1/Ifngr2/Ifngr1/Jak2/Egf     6
## KEGG_mmu04612              B2m/H2-D1/Tap1/Tap2/Rfxank/Rfx5     6
## KEGG_mmu05340                        Tap1/Tap2/Rfxank/Rfx5     4
## KEGG_mmu05212                  Jak1/Stat1/Cdkn1a/Egf/Trp53     5
## KEGG_mmu04658                Jak1/Stat1/Ifngr2/Ifngr1/Jak2     5
# ortRes2 = enrich.ORT(genelist[genelist< -1])
# head(ortRes2@result)

2.3 Gene set enrichment analysis

library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.GSE.
gseRes1 = EnrichAnalyzer(genelist, method = "GSEA", type = "Pathway", organism = "mmu")
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (8.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

2.4 Visualize enrichment results.

2.4.1 Barplot

require(ggplot2)
df = hgtRes1@result
df$logFDR = -log10(df$p.adjust)
p = BarView(df[1:5,], "Description", 'logFDR')
p = p + labs(x = NULL) + coord_flip()
p

# Or use function barplot from enrichplot package
barplot(hgtRes1, showCategory = 5)

2.4.2 Dot plot

## top: up-regulated pathways; 
## bottom: down-regulated pathways
EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 1)

EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 2)

dotplot(hgtRes1, showCategory = 5)

2.4.3 Other visualization functions from enrichplot (Yu 2018).

library(enrichplot)
hgtRes1@result$geneID = hgtRes1@result$geneName
cnetplot(hgtRes1, 5)

heatplot(hgtRes1, showCategory = 5, foldChange = genelist)

tmp <- pairwise_termsim(hgtRes1)
emapplot(tmp, showCategory = 5, layout = "kk")

2.4.4 Visulization for GSEA enriched categories

# show GSEA results of one pathway
idx = which(gseRes1$NES>0)[1]
gseaplot(gseRes1, geneSetID = idx, title = gseRes1$Description[idx])

# show GSEA results of multiple pathways
gseaplot2(gseRes1, geneSetID = which(gseRes1$NES>0)[1:3])

2.5 Type of gene sets for enrichment analysis

For enrichment analysis, MAGeCKFlute signifies the public available gene sets, including Pathways (PID, KEGG, REACTOME, BIOCARTA, C2CP), GO terms (GOBP, GOCC, GOMF), Complexes (CORUM) and molecular signature from MsigDB (c1, c2, c3, c4, c5, c6, c7, HALLMARK).

2.5.1 Functional enrichment analysis on GO terms and pathways

Analysis of high-throughput data increasingly relies on pathway annotation and functional information derived from Gene Ontology, which is also useful in the analysis of CRISPR screens.

## combination of the gene sets
enrichComb = EnrichAnalyzer(genelist[1:200], type = "KEGG+REACTOME", organism = "mmu")
EnrichedView(enrichComb, top = 5)
## All pathways
enrich = EnrichAnalyzer(geneList = genelist[1:200], type = "REACTOME", organism = "mmu")
EnrichedView(enrich, top = 5)
## All gene ontology
enrichGo = EnrichAnalyzer(genelist[1:200], type = "GOBP", organism = "mmu")

2.5.2 Protein complex analysis

Functional annotations from the pathways and GO are powerful in the context of network dynamics. However, the approach has limitations in particular for the analysis of CRISPR screenings, in which elements within a protein complex rather than complete pathways might have a strong selection. So we incorporate protein complex resource from CORUM database, which enable the identification of essential protein complexes from the CRISPR screens.

enrichPro = EnrichAnalyzer(genelist[1:200], type = "Complex", organism = "mmu")
EnrichedView(enrichPro, top = 5)

2.5.3 Enrichment analysis on the combination of the gene sets

2.6 Limit the size of gene sets for testing

Usually, only the core genes in a pathway could be selected in a CRISPR screen, so we recommend to perform enrichment analysis on small pathways instead of pathways involving hundreds or even more genes.

enrich = EnrichAnalyzer(genelist[1:200], type = "GOBP", limit = c(2, 80), organism = "mmu")
EnrichedView(enrich, top = 5)

2.7 Remove redundant results using EnrichedFilter.

The EnrichedFilter function tries to eliminate redundant pathways from the enrichment results by calculating the Jaccard index between pathways.

enrich1 = EnrichAnalyzer(genelist[1:200], type = "Pathway", organism = "mmu")
enrich2 = EnrichedFilter(enrich1)
EnrichedView(enrich1, top = 15)

EnrichedView(enrich2, top = 15)

3 Session info

sessionInfo()
## R version 4.2.0 RC (2022-04-19 r82224)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.15-bioc/R/lib/libRblas.so
## LAPACK: /home/biocbuild/bbs-3.15-bioc/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] enrichplot_1.16.0     ggplot2_3.3.5         clusterProfiler_4.4.0
## [4] MAGeCKFlute_2.0.0     BiocStyle_2.24.0     
## 
## loaded via a namespace (and not attached):
##   [1] shadowtext_0.1.2              depmap_1.9.2                 
##   [3] AnnotationHub_3.4.0           fastmatch_1.1-3              
##   [5] BiocFileCache_2.4.0           plyr_1.8.7                   
##   [7] igraph_1.3.1                  lazyeval_0.2.2               
##   [9] splines_4.2.0                 BiocParallel_1.30.0          
##  [11] pathview_1.36.0               GenomeInfoDb_1.32.0          
##  [13] sva_3.44.0                    digest_0.6.29                
##  [15] yulab.utils_0.0.4             htmltools_0.5.2              
##  [17] GOSemSim_2.22.0               magick_2.7.3                 
##  [19] viridis_0.6.2                 GO.db_3.15.0                 
##  [21] fansi_1.0.3                   magrittr_2.0.3               
##  [23] memoise_2.0.1                 limma_3.52.0                 
##  [25] annotate_1.74.0               Biostrings_2.64.0            
##  [27] graphlayouts_0.8.0            matrixStats_0.62.0           
##  [29] colorspace_2.0-3              blob_1.2.3                   
##  [31] rappdirs_0.3.3                ggrepel_0.9.1                
##  [33] xfun_0.30                     dplyr_1.0.8                  
##  [35] crayon_1.5.1                  RCurl_1.98-1.6               
##  [37] jsonlite_1.8.0                graph_1.74.0                 
##  [39] scatterpie_0.1.7              genefilter_1.78.0            
##  [41] survival_3.3-1                ape_5.6-2                    
##  [43] glue_1.6.2                    polyclip_1.10-0              
##  [45] gtable_0.3.0                  zlibbioc_1.42.0              
##  [47] XVector_0.36.0                Rgraphviz_2.40.0             
##  [49] BiocGenerics_0.42.0           msigdbr_7.5.1                
##  [51] scales_1.2.0                  DOSE_3.22.0                  
##  [53] pheatmap_1.0.12               edgeR_3.38.0                 
##  [55] DBI_1.1.2                     Rcpp_1.0.8.3                 
##  [57] viridisLite_0.4.0             xtable_1.8-4                 
##  [59] gridGraphics_0.5-1            tidytree_0.3.9               
##  [61] bit_4.0.4                     stats4_4.2.0                 
##  [63] httr_1.4.2                    fgsea_1.22.0                 
##  [65] RColorBrewer_1.1-3            ellipsis_0.3.2               
##  [67] pkgconfig_2.0.3               XML_3.99-0.9                 
##  [69] farver_2.1.0                  sass_0.4.1                   
##  [71] dbplyr_2.1.1                  locfit_1.5-9.5               
##  [73] utf8_1.2.2                    labeling_0.4.2               
##  [75] ggplotify_0.1.0               tidyselect_1.1.2             
##  [77] rlang_1.0.2                   reshape2_1.4.4               
##  [79] later_1.3.0                   AnnotationDbi_1.58.0         
##  [81] munsell_0.5.0                 BiocVersion_3.15.2           
##  [83] tools_4.2.0                   cachem_1.0.6                 
##  [85] downloader_0.4                cli_3.3.0                    
##  [87] generics_0.1.2                RSQLite_2.2.12               
##  [89] ExperimentHub_2.4.0           evaluate_0.15                
##  [91] stringr_1.4.0                 fastmap_1.1.0                
##  [93] yaml_2.3.5                    ggtree_3.4.0                 
##  [95] babelgene_22.3                org.Hs.eg.db_3.15.0          
##  [97] knitr_1.38                    bit64_4.0.5                  
##  [99] tidygraph_1.2.1               purrr_0.3.4                  
## [101] KEGGREST_1.36.0               ggraph_2.0.5                 
## [103] nlme_3.1-157                  mime_0.12                    
## [105] KEGGgraph_1.56.0              aplot_0.1.3                  
## [107] DO.db_2.9                     compiler_4.2.0               
## [109] filelock_1.0.2                curl_4.3.2                   
## [111] png_0.1-7                     interactiveDisplayBase_1.34.0
## [113] treeio_1.20.0                 tibble_3.1.6                 
## [115] tweenr_1.0.2                  bslib_0.3.1                  
## [117] stringi_1.7.6                 highr_0.9                    
## [119] lattice_0.20-45               Matrix_1.4-1                 
## [121] vctrs_0.4.1                   pillar_1.7.0                 
## [123] lifecycle_1.0.1               BiocManager_1.30.17          
## [125] jquerylib_0.1.4               data.table_1.14.2            
## [127] bitops_1.0-7                  httpuv_1.6.5                 
## [129] patchwork_1.1.1               qvalue_2.28.0                
## [131] R6_2.5.1                      bookdown_0.26                
## [133] promises_1.2.0.1              gridExtra_2.3                
## [135] IRanges_2.30.0                codetools_0.2-18             
## [137] MASS_7.3-57                   assertthat_0.2.1             
## [139] withr_2.5.0                   S4Vectors_0.34.0             
## [141] GenomeInfoDbData_1.2.8        mgcv_1.8-40                  
## [143] parallel_4.2.0                grid_4.2.0                   
## [145] ggfun_0.0.6                   tidyr_1.2.0                  
## [147] rmarkdown_2.14                ggnewscale_0.4.7             
## [149] ggforce_0.3.3                 Biobase_2.56.0               
## [151] shiny_1.7.1

References

Yu, Guangchuang. 2018. Enrichplot: Visualization of Functional Enrichment Result. https://github.com/GuangchuangYu/enrichplot.

Yu, Guangchuang, Li-Gen Wang, Yanyan Han, and Qing-Yu He. 2012. “ClusterProfiler: An R Package for Comparing Biological Themes Among Gene Clusters.” OMICS: A Journal of Integrative Biology 16 (5): 284–87. https://doi.org/10.1089/omi.2011.0118.