library("GenomicFeatures")
library("dplyr")
path1 = setwd("C:/Users/95656/Desktop/hyp/TZX/")
setwd(path1)
# 计算基因length
gffpath = "Gmax_275_Wm82.a2.v1.gene.gff3"
print(gffpath)
txdb <- makeTxDbFromGFF(gffpath,format="gff")
exons_gene <- exonsBy(txdb, by = "gene")
exons_gene_lens <- lapply(exons_gene,function(x){sum(width(reduce(x)))})
length1 = t(as.data.frame(exons_gene_lens))
write.table(length1,"C:/Users/95656/Desktop/hyp/TZX/length.txt", sep = "\t")
# 合并 reads count 数据和length
length1 = read.table("C:/Users/95656/Desktop/hyp/TZX/length.txt", header = T,sep="\t")
countsfile = "C:/Users/95656/Desktop/hyp/TZX/PRJNA238493_BP_raw_count.expression.tsv"
count1 = read.table(countsfile, header = T, sep = "\t")
a = inner_join(count1, length1, by="Locus_Name")
a[is.na(a)] = 0 # 设置 NA值为0
a = a[, -28] # 删除第28列 Locus_Name.1
write.table(a,"C:/Users/95656/Desktop/hyp/TZX/countsdata.txt", sep = "\t", row.names = FALSE)
# 按照对应关系给countdata columns 改名,改好的文件保存为 re_countsdata.txt
# {'SAMN02644800': 'root', 'SAMN02644811': 'leaf3', 'SAMN02644826': 'seed4', 'SAMN02644825': 'seed3', 'SAMN02644822': 'podseed3', 'SAMN02644821': 'podseed2', 'SAMN02644820': 'podseed1', 'SAMN02644803': 'stem1', 'SAMN02644806': 'leafbud1', 'SAMN02644804': 'stem2', 'SAMN02644813': 'flower2', 'SAMN02644814': 'flower3', 'SAMN02644816': 'flower5', 'SAMN02644815': 'flower4', 'SAMN02644817': 'pod1', 'SAMN02644823': 'seed1', 'SAMN02644818': 'pod2', 'SAMN02644819': 'pod3', 'SAMN02644824': 'seed2', 'SAMN02644809': 'leaf1', 'SAMN02644807': 'leafbud2', 'SAMN02644802': 'cotyledon2', 'SAMN02644810': 'leaf2', 'SAMN02644805': 'shoot meristem', 'SAMN02644812': 'flower1', 'SAMN02644827': 'seed5'}
# 计算 TPM 和 FPKM
setwd("C:/Users/95656/Desktop/hyp/TZX/")
countfile = "C:/Users/95656/Desktop/hyp/TZX/re_countsdata.txt"
mycounts<-read.table(countfile, header = T,sep="\t")
head(mycounts)
rownames(mycounts)<-mycounts[,1]
mycounts<-mycounts[,-1]
head(mycounts)
# TPM
kb <- mycounts$length / 1000
countdata <- mycounts[,1:26]
rpk <- countdata / kb
tpm <- t(t(rpk)/colSums(rpk) * 1000000)
head(tpm)
write.table(tpm,file="my_TPM.txt",sep="\t",quote=F)
# FPKM
fpkm <- t(t(rpk)/colSums(countdata) * 10^6)
head(fpkm)
write.table(fpkm,file="my_FPKM.txt",sep="\t",quote=F)
## FPKM转化为TPM
# fpkm_to_tpm = t(t(fpkm)/colSums(fpkm))*10^6
# head(fpkm_to_tpm)