donglin.li

主页/Home 履历/CV 帖文/Posts 深色/Dark

🧬 · ChIP-seq/ChIRP-seq 数据标注和富集分析

Posted on 2025/1/4

    使用ChIPseeker对bedGraph文件进行标注和富集分析

    ChIPseeker文档 https://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html#functional-enrichment-analysis

    准备

    从GEO下载果蝇roX1的ChIRP-seq文件(GSE97456)

    在R Studio中安装BiocManager和ChIPseeker。过程比较艰难,简言之就是提示缺什么包就下什么包,最终环境如下:

     [1] enrichplot_1.26.5                         
     [2] org.Dm.eg.db_3.20.0                       
     [3] ReactomePA_1.50.0                         
     [4] clusterProfiler_4.14.4                    
     [5] ChIPseeker_1.42.0                         
     [6] TxDb.Dmelanogaster.UCSC.dm6.ensGene_3.12.0
     [7] GenomicFeatures_1.58.0                    
     [8] GenomicRanges_1.58.0                      
     [9] GenomeInfoDb_1.42.1                       
    [10] AnnotationDbi_1.68.0                      
    [11] IRanges_2.40.1                            
    [12] S4Vectors_0.44.0                          
    [13] Biobase_2.66.0                            
    [14] BiocGenerics_0.52.0                       
    [15] BiocManager_1.30.25  

    标注与富集

    执行了以下命令

    # 指定bedGraph文件路径,并标注
    peakFile <- "/Users/lidonglin/Downloads/GSE69208_MEL-roX1.merge.bedGraph"
    peak <- readPeakFile(peakFile)
    txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
    anno <- annotatePeak(peak, TxDb = txdb, tssRegion = c(-3000, 3000))
    geneList <- as.data.frame(anno)$geneId
    
    # Map IDs
    entrezid <- mapIds(org.Dm.eg.db, keys = geneList, keytype = "FLYBASE", column = "ENTREZID")
    uniprotid <- mapIds(org.Dm.eg.db, keys = geneList, keytype = "FLYBASE", column = "UNIPROT")
    
    # 通路富集
    pathway1 <- enrichGO(entrezid, org.Dm.eg.db)
    pathway2 <- enrichPathway(entrezid, organism = "fly") #Reactome
    pathway3 <- enrichKEGG(uniprotid, organism = "dme", keyType = "uniprot")
    
    # 查看 保存结果为csv
    View(pathway1@result)
    write.csv(pathway1@result, file = "pathway1_result.csv", row.names = FALSE)
    
    # 作图(不好使,报错)
    # 	Error in ans[ypos] <- rep(yes, length.out = len)[ypos] : 
    #   	replacement has length zero
    dotplot(pathway1)

    作图

    针对dotplot()不知为啥不好使的问题,有一个曲线救国的方法,以KEGG结果为例:

    library(ggplot2)
    
    kegg_result <- read.csv("/Users/lidonglin/Downloads/roX1 ChIRPseq KEGG enrich.csv")
    
    rownames(kegg_result) <- 1:nrow(kegg_result)
    
    kegg_result$order=factor(rev(as.integer(rownames(kegg_result))),labels = rev(kegg_result$Description))
    
    ggplot(kegg_result,aes(y=order, x=Count))+
    + geom_point(aes(size=Count, color=-1*p.adjust))+
    + scale_color_gradient(low="green",high="red")+
    + labs(color=expression(p.adjust,size="Count"), x="Gene Number",y="Pathways",title="KEGG Pathway Enrichment")+
    + theme_bw()

    Powered by Hexo
    Theme based on Minima by Adi Sakti Jrs
    Published on GitHub