TCseq_time_course_DEGs_clusters

编程语言2024-11-29 18:51:02
# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("TCseq")


if(F){
  ##https://www.jianshu.com/p/26332e4da742
  ##将自己的数据处理成standard input data
  ## 安装
  BiocManager::install("TCseq")
  library(TCseq)
  
  # 位置信息文件
  genomicIntervals <- read.delim('intervial.txt',stringsAsFactors = FALSE)
  genomicIntervals$chr <- as.factor(genomicIntervals$chr)
  
  
  ## 表达量矩阵文件
  count <- read.delim('count.txt',header = T,row.names = 1)
  # as.integer 转换为整数型
  countsTable <- count %>%  mutate(across(where(is.numeric), as.integer)) 
  # or
  #count1 <- apply(count, 2, as.integer)
  rownames(countsTable) <- rownames(count)
  ##矩阵形式
  countsTable <- as.matrix(countsTable)
  
  
  # 分组文件
  experiment <- read.delim('group.txt',header = T,stringsAsFactors = FALSE)
  experiment$sampleid <- as.factor(experiment$sampleid)
}


#以下是官方文档modified
#三个输入文件：
#1）基因表达矩阵文件 **countsTable**
#2）分组文件(样品，时间点，分组三列) **experiment**
#3）基因的位置信息(染色体，起始及终止位点，基因id四列) **genomicIntervals**
library(TCseq)
############
#data preparation
############
str(experiment);class(experiment)
str(genomicIntervals);class(genomicIntervals)
str(countsTable);class(countsTable)

data("genomicIntervals")
head(genomicIntervals)
#Experiment design without BAM file information
data("experiment")
#Counts table
data("countsTable")
##creat a tca object
tca <- TCA(design = experiment, genomicFeature = genomicIntervals,
           counts = countsTable)
tca
##or through **SummarizedExperiment** method
# suppressWarnings(library(SummarizedExperiment))
# se <- SummarizedExperiment(assays=list(counts = countsTable), colData = experiment)
# tca <- TCAFromSummarizedExperiment(se = se, genomicFeature = genomicIntervals)

#####
#differential analysis
#####
tca <- DBanalysis(tca)
#filter :The following step only keeps genomic regions with two or more more samples that have read counts more than 10.
tca <- DBanalysis(tca, filter.type = "raw", filter.value = 10, samplePassfilter = 2)

DBres <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"))
str(DBres, strict.width =  "cut")
head(DBres$`24hvs0h`)

DBres.sig <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"), top.sig = TRUE) #top.sig = TRUE: abs(log2-fold > 2)&adjust p-value<0.05
str(DBres.sig, strict.width =  "cut")


###################################################
###time Course analysis
###################################################
# values are logFC of all time points compared to the initial time point
#filter = TRUE: , the time course table will filter out all genomic regions with no significant changes between any two time points.
tca <- timecourseTable(tca, value = "FC", norm.method = "rpkm", filter = TRUE)

# values are normalized read counts
tca <- timecourseTable(tca, value = "expression", norm.method = "rpkm", filter = TRUE)

t <- tcTable(tca)
head(t)


###################################################
### cluster analysis
###################################################
tca <- timeclust(tca, algo = "cm", k = 6, standardize = TRUE)
p <- timeclustplot(tca, value = "z-score(PRKM)", cols = 3)
#plot cluster 1:
## print(p[[1]])

###################################################
### extract interested clusters
###################################################

a <- as.data.frame(tca@clusterRes@cluster) # interger with names transform to df
names(a) <- 'Cluster'
## 查看各个聚类中的基因数目
table(a)
##筛选 
Cluster1 <- subset(a,Cluster == 2)
查看全文
https://www.xamrdz.com/lan/5kd2016491.html
相关文章：