DESeq2 analysis
f = gsub('~', '~ 1 +', opt$X_C) # build formula
dds = DESeqDataSetFromMatrix(countData = count_data,
colData = coldata,
design = formula(f))
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# prefiltering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Run DESeq
if (opt$X_T == 'LRT') {
reduced_f = gsub(paste0('\\+\\s*', opt$X_D), '', f)
dds = DESeq(dds, test=opt$X_T, fitType = opt$X_H, reduced = formula(reduced_f))
} else {
dds = DESeq(dds, test=opt$X_T, fitType = opt$X_H)
}
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## Differential expression test results
res = results(dds, contrast = c(opt$X_D, opt$X_E, opt$X_F), alpha = opt$X_I)
res
## log2 fold change (MLE): condition treated vs untreated
## Wald test p-value: condition treated vs untreated
## DataFrame with 9921 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## FBgn0000008 95.144292 -0.04055769 0.2200633 -0.1843001 0.8537780
## FBgn0000014 1.056523 -0.08351017 2.0760815 -0.0402249 0.9679138
## FBgn0000017 4352.553569 -0.25605716 0.1122166 -2.2818127 0.0225004
## FBgn0000018 418.610484 -0.06461537 0.1313488 -0.4919373 0.6227636
## FBgn0000024 6.406200 0.30898228 0.7560076 0.4087026 0.6827579
## ... ... ... ... ... ...
## FBgn0261570 3208.388610 0.25898295 0.1051143 2.4638234 0.01374638
## FBgn0261572 6.197188 -0.97545920 0.8074648 -1.2080516 0.22702741
## FBgn0261573 2240.979511 -0.01071335 0.1018940 -0.1051421 0.91626308
## FBgn0261574 4857.680373 0.07193860 0.1211139 0.5939748 0.55252899
## FBgn0261575 10.682520 0.57311354 0.7472629 0.7669503 0.44311106
## padj
## <numeric>
## FBgn0000008 0.9494616
## FBgn0000014 NA
## FBgn0000017 0.1302623
## FBgn0000018 0.8593923
## FBgn0000024 0.8877717
## ... ...
## FBgn0261570 0.09078025
## FBgn0261572 0.55886678
## FBgn0261573 0.97040672
## FBgn0261574 0.82565277
## FBgn0261575 0.75195111
# save all padj sorted res to tool output directory
padj_sorted_res = res[order(res$padj), ]
write.table(padj_sorted_res,
file = paste0(opt$X_d, '/padj-sorted-genes.txt'),
quote = FALSE)
# save significant genes to a file in tool output directory
sig_res = res[(res$padj < opt$X_I) & !is.na(res$padj), ]
sig_res_sorted = sig_res[order(sig_res$padj), ]
sig_res_sorted$feature_id = rownames(sig_res_sorted)
n_col = ncol(sig_res_sorted)
sig_res_sorted = sig_res_sorted[, c(n_col, 1:(n_col - 1))]
write.table(sig_res_sorted,
file = paste0(opt$X_d, '/padj-sorted-significant-genes.txt'),
quote = FALSE, row.names = FALSE)
MA-plot
log_fold_change = res$log2FoldChange
base_mean = res$baseMean
significant = res$padj
significant[significant < 0.1] = 'yes'
significant[significant != 'yes'] = 'no'
maplot_df = data.frame(log_fold_change, base_mean, significant)
maplot_df = maplot_df[!is.na(maplot_df$significant), ]
p = ggplot(data = maplot_df) +
geom_point(mapping = aes(log(base_mean), log_fold_change, color = significant),
size = 0.5) +
scale_color_manual(name = 'Significant',
values = c('no' = 'black', 'yes' = 'red'),
labels = c('No', 'Yes')) +
xlab('Log base mean') +
ylab('Log fold change') +
theme_classic()
plotly::ggplotly(p)