skewer \
/export/galaxy-central/database/files/000/dataset_7.dat \
/export/galaxy-central/database/files/000/dataset_8.dat \
-m head \
-r 0.1 \
-d 0.03 \
-q 0 \
-Q 0 \
-l 18 \
-f auto \
-o trim > skewer.log.txt 2>&1
log = readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/trim-trimmed.log'))
start_line = grep('read.+processed; of these:', log)
end_line = grep('untrimmed.+available after processing', log)
processing_summary = gsub('(\\d+) ', '\\1\t', log[start_line:end_line])
processing_summary_df = do.call(rbind, strsplit(processing_summary, '\t'))
colnames(processing_summary_df) = c('Total reads:', processing_summary_df[1,1])
knitr::kable(processing_summary_df[-1, ])
Total reads: | 250000 |
---|---|
0 | ( 0.00%) short read pairs filtered out after trimming by size control |
0 | ( 0.00%) empty read pairs filtered out after trimming by size control |
250000 | (100.00%) read pairs available; of these: |
10328 | ( 4.13%) trimmed read pairs available after processing |
239672 | (95.87%) untrimmed read pairs available after processing |
start_line = grep('length count percentage', log)
len_dist = log[(start_line):length(log)]
len_dist = do.call(rbind, strsplit(len_dist, '\t'))
columns = len_dist[1, ]
len_dist = as.data.frame(len_dist[-1, ])
colnames(len_dist) = columns
library(plotly)
library(ggplot2)
len_dist$count = as.numeric(len_dist$count)
labels = as.character(len_dist$length)
len_dist$length = 1:nrow(len_dist)
pp = ggplot(data = len_dist, aes(length, count)) +
geom_line(color='red') +
scale_x_continuous(name = 'Length',
breaks = 1:nrow(len_dist),
labels = labels) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ylab('Count') +
ggtitle('Length distribution')
ggplotly(pp)