# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("TCseq")
if(F){
##https://www.jianshu.com/p/26332e4da742
##将自己的数据处理成standard input data
## 安装
BiocManager::install("TCseq")
library(TCseq)
# 位置信息文件
genomicIntervals <- read.delim('intervial.txt',stringsAsFactors = FALSE)
genomicIntervals$chr <- as.factor(genomicIntervals$chr)
## 表达量矩阵文件
count <- read.delim('count.txt',header = T,row.names = 1)
# as.integer 转换为整数型
countsTable <- count %>% mutate(across(where(is.numeric), as.integer))
# or
#count1 <- apply(count, 2, as.integer)
rownames(countsTable) <- rownames(count)
##矩阵形式
countsTable <- as.matrix(countsTable)
# 分组文件
experiment <- read.delim('group.txt',header = T,stringsAsFactors = FALSE)
experiment$sampleid <- as.factor(experiment$sampleid)
}
#以下是官方文档modified
#三个输入文件:
#1)基因表达矩阵文件 **countsTable**
#2)分组文件(样品,时间点,分组三列) **experiment**
#3)基因的位置信息(染色体,起始及终止位点,基因id四列) **genomicIntervals**
library(TCseq)
############
#data preparation
############
str(experiment);class(experiment)
str(genomicIntervals);class(genomicIntervals)
str(countsTable);class(countsTable)
data("genomicIntervals")
head(genomicIntervals)
#Experiment design without BAM file information
data("experiment")
#Counts table
data("countsTable")
##creat a tca object
tca <- TCA(design = experiment, genomicFeature = genomicIntervals,
counts = countsTable)
tca
##or through **SummarizedExperiment** method
# suppressWarnings(library(SummarizedExperiment))
# se <- SummarizedExperiment(assays=list(counts = countsTable), colData = experiment)
# tca <- TCAFromSummarizedExperiment(se = se, genomicFeature = genomicIntervals)
#####
#differential analysis
#####
tca <- DBanalysis(tca)
#filter :The following step only keeps genomic regions with two or more more samples that have read counts more than 10.
tca <- DBanalysis(tca, filter.type = "raw", filter.value = 10, samplePassfilter = 2)
DBres <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"))
str(DBres, strict.width = "cut")
head(DBres$`24hvs0h`)
DBres.sig <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"), top.sig = TRUE) #top.sig = TRUE: abs(log2-fold > 2)&adjust p-value<0.05
str(DBres.sig, strict.width = "cut")
###################################################
###time Course analysis
###################################################
# values are logFC of all time points compared to the initial time point
#filter = TRUE: , the time course table will filter out all genomic regions with no significant changes between any two time points.
tca <- timecourseTable(tca, value = "FC", norm.method = "rpkm", filter = TRUE)
# values are normalized read counts
tca <- timecourseTable(tca, value = "expression", norm.method = "rpkm", filter = TRUE)
t <- tcTable(tca)
head(t)
###################################################
### cluster analysis
###################################################
tca <- timeclust(tca, algo = "cm", k = 6, standardize = TRUE)
p <- timeclustplot(tca, value = "z-score(PRKM)", cols = 3)
#plot cluster 1:
## print(p[[1]])
###################################################
### extract interested clusters
###################################################
a <- as.data.frame(tca@clusterRes@cluster) # interger with names transform to df
names(a) <- 'Cluster'
## 查看各个聚类中的基因数目
table(a)
##筛选
Cluster1 <- subset(a,Cluster == 2)