DESeq2 analysis

f = gsub('~', '~ 1 +', opt$X_C) # build formula
dds = DESeqDataSetFromMatrix(countData = count_data,
                             colData = coldata,
                             design = formula(f))
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# prefiltering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

# Run DESeq
if (opt$X_T == 'LRT') {
  reduced_f = gsub(paste0('\\+\\s*', opt$X_D), '', f)
  dds =  DESeq(dds, test=opt$X_T, fitType = opt$X_H, reduced = formula(reduced_f))
} else {
  dds = DESeq(dds, test=opt$X_T, fitType = opt$X_H)
}
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## Differential expression test results
res = results(dds, contrast = c(opt$X_D, opt$X_E, opt$X_F), alpha = opt$X_I)
res
## log2 fold change (MLE): condition treated vs untreated 
## Wald test p-value: condition treated vs untreated 
## DataFrame with 9921 rows and 6 columns
##                baseMean log2FoldChange     lfcSE       stat     pvalue
##               <numeric>      <numeric> <numeric>  <numeric>  <numeric>
## FBgn0000008   95.144292    -0.04055769 0.2200633 -0.1843001  0.8537780
## FBgn0000014    1.056523    -0.08351017 2.0760815 -0.0402249  0.9679138
## FBgn0000017 4352.553569    -0.25605716 0.1122166 -2.2818127  0.0225004
## FBgn0000018  418.610484    -0.06461537 0.1313488 -0.4919373  0.6227636
## FBgn0000024    6.406200     0.30898228 0.7560076  0.4087026  0.6827579
## ...                 ...            ...       ...        ...        ...
## FBgn0261570 3208.388610     0.25898295 0.1051143  2.4638234 0.01374638
## FBgn0261572    6.197188    -0.97545920 0.8074648 -1.2080516 0.22702741
## FBgn0261573 2240.979511    -0.01071335 0.1018940 -0.1051421 0.91626308
## FBgn0261574 4857.680373     0.07193860 0.1211139  0.5939748 0.55252899
## FBgn0261575   10.682520     0.57311354 0.7472629  0.7669503 0.44311106
##                   padj
##              <numeric>
## FBgn0000008  0.9494616
## FBgn0000014         NA
## FBgn0000017  0.1302623
## FBgn0000018  0.8593923
## FBgn0000024  0.8877717
## ...                ...
## FBgn0261570 0.09078025
## FBgn0261572 0.55886678
## FBgn0261573 0.97040672
## FBgn0261574 0.82565277
## FBgn0261575 0.75195111
# save all padj sorted res to tool output directory
padj_sorted_res = res[order(res$padj), ]
write.table(padj_sorted_res,
            file = paste0(opt$X_d, '/padj-sorted-genes.txt'),
            quote = FALSE)

# save significant genes to a file in tool output directory
sig_res = res[(res$padj < opt$X_I) & !is.na(res$padj), ]
sig_res_sorted = sig_res[order(sig_res$padj), ]
sig_res_sorted$feature_id = rownames(sig_res_sorted)
n_col = ncol(sig_res_sorted)
sig_res_sorted = sig_res_sorted[, c(n_col, 1:(n_col - 1))]
write.table(sig_res_sorted, 
            file = paste0(opt$X_d, '/padj-sorted-significant-genes.txt'), 
            quote = FALSE, row.names = FALSE)

MA-plot

log_fold_change = res$log2FoldChange
base_mean = res$baseMean
significant = res$padj
significant[significant < 0.1] = 'yes'
significant[significant != 'yes'] = 'no'

maplot_df = data.frame(log_fold_change, base_mean, significant)
maplot_df = maplot_df[!is.na(maplot_df$significant), ]
p = ggplot(data = maplot_df) +
  geom_point(mapping = aes(log(base_mean), log_fold_change, color = significant),
             size = 0.5) +
  scale_color_manual(name = 'Significant',
                     values = c('no' = 'black', 'yes' = 'red'),
                     labels = c('No', 'Yes')) +
  xlab('Log base mean') +
  ylab('Log fold change') +
  theme_classic()

plotly::ggplotly(p)

Heatmap of count matrix

ntd <- normTransform(dds)
select <- order(rowMeans(counts(dds,normalized=TRUE)),
                decreasing=TRUE)[1:20]
df <- as.data.frame(colData(dds)[, -ncol(colData(dds))])
pheatmap(assay(ntd)[select,], annotation_col=df)

Principle component analysis plot

vsd <- vst(dds, blind=FALSE)
p = plotPCA(vsd, intgroup=c(opt$X_D)) + 
  scale_color_discrete(name = 'Group') +
  theme_classic()
ggplotly(p)

Outputs