require-packages
. "guile"
. "guile-dsv" . "guile-libyaml"
import
ice-9 match
yaml
dsv
define : load-settings
define here
or
getenv "srcdir"
getcwd
define defaults
file here / "etc" / "settings.yaml"
unless : file-exists? defaults
error "Could not find default settings!"
let*
:
user-settings
if : file-exists? "settings.yaml"
read-yaml-file "settings.yaml"
list
settings
append
read-yaml-file defaults
. user-settings
extra-locations
`
:
"prefix" . "PREFIX"
"exec_prefix" . "EXEC_PREFIX"
"libexecdir" . "LIBEXECDIR"
"pkglibexecdir" . "PKGLIBEXECDIR"
"datarootdir" . "DATAROOTDIR"
"pkgdatadir" . "PKGDATADIR"
new-settings
append
alist-delete "locations" settings
append
get settings "locations"
. extra-locations
. settings
define config
load-settings
define GTF_FILE
file (getcwd) /
get config "locations" "gtf-file"
define SAMPLE_SHEET_FILE
. "sample_sheet.csv"
define GENOME_FASTA
get config "locations" "genome-fasta"
define CDNA_FASTA
get config "locations" "cdna-fasta"
define READS_DIR
get config "locations" "reads-dir"
define OUTPUT_DIR
file (getcwd) /
get config "locations" "output-dir"
define LOGO
if : getenv "PIGX_UNINSTALLED"
file
getcwd
. / "images" / "Logo_PiGx.png"
file
getcwd
. / "Logo_PiGx.png"
define SCRIPTS_DIR
file
getcwd
. / "scripts"
define TRIMMED_READS_DIR
file OUTPUT_DIR / "trimmed_reads"
define LOG_DIR
file OUTPUT_DIR / "logs"
define FASTQC_DIR
file OUTPUT_DIR / "fastqc"
define MULTIQC_DIR
file OUTPUT_DIR / "multiqc"
define MAPPED_READS_DIR
file OUTPUT_DIR / "mapped_reads"
define BIGWIG_DIR
file OUTPUT_DIR / "bigwig_files"
define COUNTS_DIR
file OUTPUT_DIR / "feature_counts"
define SALMON_DIR
file OUTPUT_DIR / "salmon_output"
define : toolArgs name
get config default: "" "tools" name "args"
define : tool name
string-append
basename : get config "tools" name "executable"
. " "
toolArgs name
define FASTQC_EXEC
tool "fastqc"
define MULTIQC_EXEC
tool "multiqc"
define STAR_EXEC_MAP
tool "star_map"
define STAR_EXEC_INDEX
tool "star_index"
define SALMON_INDEX_EXEC
tool "salmon_index"
define SALMON_QUANT_EXEC
tool "salmon_quant"
define TRIM_GALORE_EXEC
tool "trim-galore"
define SAMTOOLS_EXEC
tool "samtools"
define HTSEQ_COUNT_EXEC
tool "htseq-count"
define GUNZIP_EXEC
tool "gunzip"
define RSCRIPT_EXEC
tool "Rscript"
define SED_EXEC
tool "sed"
define STAR_INDEX_THREADS
get config "execution" "rules" "star_index" "threads"
define SALMON_INDEX_THREADS
get config "execution" "rules" "salmon_index" "threads"
define STAR_MAP_THREADS
get config "execution" "rules" "star_map" "threads"
define SALMON_QUANT_THREADS
get config "execution" "rules" "salmon_quant" "threads"
define ORGANISM
get config "organism"
define DE_ANALYSIS_LIST
get config default: (list) "DEanalyses"
define : parse-sample-sheet file
. "Parse FILE as a sample sheet and return a list (rows) of
lists (columns)."
catch 'dsv-parser-error
lambda _
with-input-from-file file
lambda _
dsv->scm format: 'rfc4180
lambda _
error "Failed to parse the sample sheet."
define SAMPLE_SHEET
let
:
define all-rows
parse-sample-sheet SAMPLE_SHEET_FILE
define header
first all-rows
define rows
drop all-rows 1
map
lambda : row
map cons header row
. rows
define SAMPLES
on SAMPLE_SHEET map
lambda : row
get row "name"
define : lookup column predicate . fields
. "Convenience function to access fields of sample sheet
columns that match the predicate. The predicate may be a string.
This returns a list of rows matching the predicates, each
consisting of a list of selected fields. If only one field was
selected, return just a list of matching columns."
define predicate*
if : procedure? predicate
. predicate
lambda : row
equal? predicate : assoc-ref row column
define records
filter
lambda : row
predicate* row
. SAMPLE_SHEET
define return-fields
match fields
: one more . rest
lambda : record
map
lambda : column-name
assoc-ref record column-name
. fields
: one
lambda : record
assoc-ref record : first fields
map return-fields records
define : reads-for-sample sample
remove string-null?
first
lookup "name" sample "reads" "reads2"
define : single-end? sample
. "Return #true if the SAMPLE library is single ended."
equal? 1
length : reads-for-sample sample
define : trimmed-reads-for-sample sample
if : single-end? sample
files TRIMMED_READS_DIR / sample "_R.fastq.gz"
files TRIMMED_READS_DIR / sample "_R" (list "1" "2") ".fastq.gz"
define : trim-galore-input sample
files READS_DIR /
reads-for-sample sample
process translate_sample_sheet_for_report
inputs
. sample-sheet: SAMPLE_SHEET_FILE
. script:
file SCRIPTS_DIR / "translate_sample_sheet_for_report.R"
outputs
file OUTPUT_DIR / "colData.tsv"
packages
. "r-minimal"
# {
{{RSCRIPT_EXEC}} {{inputs:script}} {{inputs:sample-sheet}} {{outputs}}
}
process trim_galore_pe (with sample)
inputs
trim-galore-input sample
outputs
. r1:
file TRIMMED_READS_DIR / sample "_R1.fastq.gz"
. r2:
file TRIMMED_READS_DIR / sample "_R2.fastq.gz"
. log:
file LOG_DIR / "trim_galore_" sample ".log"
packages
. "trim-galore"
. "coreutils"
procedure
define read1
first
lookup "name" sample "reads"
define read2
first
lookup "name" sample "reads2"
;; TODO: use basename instead? Or is the extension not always the same?
;; For example ".fastq.gz" and ".fq.gz"?
define : without-extension file-name
string-join
drop-right
string-split file-name #\.
. 2
. "."
define tmp1
file TRIMMED_READS_DIR /
without-extension read1
. "_val_1.fq.gz"
define tmp2
file TRIMMED_READS_DIR /
without-extension read2
. "_val_2.fq.gz"
# {
mkdir -p {{LOG_DIR}}
{{TRIM_GALORE_EXEC}} -o {{TRIMMED_READS_DIR}} \
--paired {{inputs}} >> {{outputs:log}} 2>&1 && \
sleep 10 && \
mv {{tmp1}} {{outputs:r1}} && \
mv {{tmp2}} {{outputs:r2}}
}
process trim_galore_se (with sample)
inputs
trim-galore-input sample
outputs
. trimmed:
file TRIMMED_READS_DIR / sample "_R.fastq.gz"
. log:
file LOG_DIR / "trim_galore_" sample ".log"
packages
. "trim-galore"
. "coreutils"
procedure
define input1
first inputs
define read-name
first
lookup "name" sample "reads"
define without-extension
string-join
drop-right
string-split read-name #\.
. 2
. "."
define tmp
file TRIMMED_READS_DIR / without-extension "_trimmed.fq.gz"
# {
mkdir -p {{LOG_DIR}}
{{TRIM_GALORE_EXEC}} -o {{TRIMMED_READS_DIR}} \
{{input1}} >> {{outputs:log}} 2>&1 && \
sleep 10 && \
mv {{tmp}} {{outputs:trimmed}}
}
process star_index
inputs
. gtf: GTF_FILE
. genome: GENOME_FASTA
outputs
. star_index_file:
file OUTPUT_DIR / "star_index" / "SAindex"
. star_index_dir:
file OUTPUT_DIR / "star_index"
. log:
file LOG_DIR / "star_index.log"
packages
. "star"
. "coreutils"
# {
mkdir -p {{LOG_DIR}}
mkdir -p {{outputs:star_index_dir}}
{{STAR_EXEC_INDEX}} \
--runMode genomeGenerate \
--runThreadN {{STAR_INDEX_THREADS}} \
--genomeDir {{outputs:star_index_dir}} \
--genomeFastaFiles {{inputs:genome}} \
--sjdbGTFfile {{inputs:gtf}} >> {{outputs:log}} 2>&1
}
process star_map (with sample)
inputs
. index_file:
pick star_index_file:
process-outputs star_index with-tags:
. index_dir:
pick star_index_dir:
process-outputs star_index with-tags:
. reads:
trimmed-reads-for-sample sample
outputs
file MAPPED_READS_DIR / sample "_Aligned.out.bam"
. log:
file LOG_DIR / "star_map_" sample ".log"
packages
. "star"
. "gzip"
. "coreutils"
procedure
define output_prefix
file MAPPED_READS_DIR / sample "_"
# {
mkdir -p {{LOG_DIR}}
mkdir -p {{MAPPED_READS_DIR}}
{{STAR_EXEC_MAP}} \
--runThreadN {{STAR_MAP_THREADS}} \
--genomeDir {{inputs:index_dir}} \
--readFilesIn {{inputs::reads}} \
--readFilesCommand '{{GUNZIP_EXEC}} -c' \
--outSAMtype BAM Unsorted \
--outFileNamePrefix {{output_prefix}} >> {{outputs:log}} 2>&1
}
process sort_bam (with sample)
inputs
file MAPPED_READS_DIR / sample "_Aligned.out.bam"
outputs
. bam:
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam"
. log:
file LOG_DIR / "samtools_sort_" sample ".log"
packages
. "samtools"
. "coreutils"
# {
mkdir -p {{LOG_DIR}}
{{SAMTOOLS_EXEC}} sort -o {{outputs:bam}} {{inputs}} >> {{outputs:log}} 2>&1
}
process index_bam (with sample)
inputs
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam"
outputs
. bai:
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam.bai"
. log:
file LOG_DIR / "samtools_index_" sample ".log"
packages
. "samtools"
. "coreutils"
# {
mkdir -p {{LOG_DIR}}
{{SAMTOOLS_EXEC}} index {{inputs}} {{outputs:bai}} >> {{outputs:log}} 2>&1
}
process fastqc (with sample)
inputs
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam"
outputs
file FASTQC_DIR / sample "_Aligned.sortedByCoord.out_fastqc.zip"
. log:
file LOG_DIR / "fastqc_" sample ".log"
packages
. "fastqc"
. "coreutils"
# {
mkdir -p {{LOG_DIR}}
mkdir -p {{FASTQC_DIR}}
{{FASTQC_EXEC}} -o {{FASTQC_DIR}} -f bam {{inputs}} >> {{outputs:log}} 2>&1
}
process salmon_index
inputs
. CDNA_FASTA
outputs
. salmon_index_dir:
file OUTPUT_DIR / "salmon_index"
. salmon_index_file:
file OUTPUT_DIR / "salmon_index" / "sa.bin"
. log:
file LOG_DIR / "salmon_index.log"
packages
. "salmon"
. "coreutils"
# {
mkdir -p {{LOG_DIR}}
mkdir -p {{outputs:salmon_index_dir}}
{{SALMON_INDEX_EXEC}} -t {{inputs}} \
-i {{outputs:salmon_index_dir}} \
-p {{SALMON_INDEX_THREADS}} >> {{outputs:log}} 2>&1
}
process salmon_quant (with sample)
inputs
. gtf: GTF_FILE
. index_file:
pick salmon_index_file:
process-outputs salmon_index with-tags:
. index_dir:
pick salmon_index_dir:
process-outputs salmon_index with-tags:
. reads:
trimmed-reads-for-sample sample
outputs
file SALMON_DIR / sample / "quant.sf"
file SALMON_DIR / sample / "quant.genes.sf"
. outfolder:
file SALMON_DIR / sample
. log:
file LOG_DIR / "salmon_quant_" sample ".log"
packages
. "salmon"
. "coreutils"
values
. reads:
pick * reads: inputs
. arguments:
begin
define single?
equal? 1
length reads
if single?
string-append "-r " (first reads) " "
string-append "-1 " (first reads) " -2 " (second reads) " "
# {
mkdir -p {{LOG_DIR}}
{{SALMON_QUANT_EXEC}} \
-i {{inputs:index_dir}} \
-l A -p {{SALMON_QUANT_THREADS}} \
{{values:arguments}} \
-o {{outputs:outfolder}} \
--seqBias --gcBias \
-g {{inputs:gtf}} >> {{outputs:log}} 2>&1
}
process counts_from_SALMON
inputs
. quantFiles:
files SALMON_DIR / SAMPLES / "quant.sf"
. quantGenesFiles:
files SALMON_DIR / SAMPLES / "quant.genes.sf"
. colDataFile:
first
process-outputs translate_sample_sheet_for_report
. script:
file SCRIPTS_DIR / "counts_matrix_from_SALMON.R"
outputs
define types
list "transcripts" "genes"
append
files COUNTS_DIR / "raw_counts" / "counts_from_SALMON." types ".tsv"
files COUNTS_DIR / "normalized" / "TPM_counts_from_SALMON." types ".tsv"
list log:
file LOG_DIR / "salmon_import_counts.log"
cons directories:
files COUNTS_DIR / (list "raw_counts" "normalized")
packages
. "r-minimal"
. "r-data-table"
. "r-tximport"
. "r-deseq2"
. "coreutils"
# {
mkdir -p {{LOG_DIR}}
mkdir -p {{outputs::directories}}
{{RSCRIPT_EXEC}} {{inputs:script}} \
{{SALMON_DIR}} {{COUNTS_DIR}} {{inputs:colDataFile}} >> {{outputs:log}} 2>&1
}
process genomeCoverage (with sample)
inputs
. size_factors_file:
file COUNTS_DIR / "normalized" / "deseq_size_factors.txt"
. bam:
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam"
. bai:
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam.bai"
. script:
file SCRIPTS_DIR / "export_bigwig.R"
outputs
files BIGWIG_DIR / sample (list ".forward" ".reverse") ".bigwig"
. log:
file LOG_DIR / "genomeCoverage_" sample ".log"
packages
. "r-minimal"
. "r-s4vectors"
. "r-genomicalignments"
. "r-genomicranges"
. "r-rtracklayer"
. "coreutils"
# {
mkdir -p {{LOG_DIR}}
mkdir -p {{BIGWIG_DIR}}
{{RSCRIPT_EXEC}} {{inputs:script}} \
{{inputs:bam}} {{sample}} {{inputs:size_factors_file}} {{BIGWIG_DIR}} >> {{outputs:log}} 2>&1
}
process multiqc
inputs
. salmon_output:
files SALMON_DIR / SAMPLES / "quant.sf"
. star_output:
files MAPPED_READS_DIR / SAMPLES "_Aligned.sortedByCoord.out.bam"
. fastqc_output:
files FASTQC_DIR / SAMPLES "_Aligned.sortedByCoord.out_fastqc.zip"
outputs
file MULTIQC_DIR / "multiqc_report.html"
. log:
file LOG_DIR / "multiqc.log"
packages
. "bash"
. "multiqc"
. "coreutils"
# bash {
mkdir -p {{LOG_DIR}}
mkdir -p /tmp/multiqc-temp
# oops: {{inputs}} includes tags!
for file in {{inputs::salmon_output}} {{inputs::star_output}} {{inputs::fastqc_output}} dir="/tmp/multiqc-temp/$(dirname ${file})"
mkdir -p ${dir}
ln -t ${dir} ${file}
done
{{MULTIQC_EXEC}} -o {{MULTIQC_DIR}} /tmp/multiqc-temp >> {{outputs:log}} 2>&1
}
process count_reads (with sample)
inputs
. gtf: GTF_FILE
. bam:
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam"
. bai:
file MAPPED_READS_DIR / sample "_Aligned.sortedByCoord.out.bam.bai"
. script:
file SCRIPTS_DIR / "count_reads.R"
outputs
file MAPPED_READS_DIR / sample ".read_counts.csv"
. log:
file LOG_DIR / sample ".count_reads.log"
packages
. "coreutils"
. "r-minimal"
. "r-genomicalignments"
. "r-rsamtools"
. "r-rtracklayer"
values
. sample-prefix: sample
. single_end:
if : single-end? sample
. "TRUE" "FALSE"
. mode:
get config "counting" "counting_mode"
. nonunique:
get config "counting" "count_nonunique"
. strandedness:
get config "counting" "strandedness"
. feature:
get config "counting" "feature"
. group_by:
get config "counting" "group_feature_by"
. yield_size:
get config "counting" "yield_size"
# {
{{RSCRIPT_EXEC}} {{inputs:script}} \
{{values:sample-prefix}} {{inputs:bam}} {{inputs:gtf}} \
{{values:single_end}} {{values:mode}} {{values:nonunique}} {{values:strandedness}} \
{{values:feature}} {{values:group_by}} {{values:yield_size}} >> {{outputs:log}} 2>&1
}
process collate_read_counts
inputs
. counts:
files MAPPED_READS_DIR / SAMPLES ".read_counts.csv"
. script:
file SCRIPTS_DIR / "collate_read_counts.R"
outputs
file COUNTS_DIR / "raw_counts" / "counts_from_star.tsv"
. log:
file LOG_DIR / "collate_read_counts.log"
packages
. "r-minimal"
. "r-data-table"
. "coreutils" # {
touch {{outputs:log}} # TODO: the script doesn't produce any log!
{{RSCRIPT_EXEC}} {{inputs:script}} {{MAPPED_READS_DIR}} {{outputs}} >> {{outputs:log}} 2>&1
}
process htseq_count
inputs
. gtf: GTF_FILE
. bams:
files MAPPED_READS_DIR / SAMPLES "_Aligned.sortedByCoord.out.bam"
outputs
. stats_file:
file COUNTS_DIR / "raw_counts" / "htseq_stats.txt"
. counts_file:
file COUNTS_DIR / "raw_counts" / "counts_from_star_htseq-count.txt"
. log:
file LOG_DIR / "htseq-count.log"
packages
. "sed"
. "htseq" . "coreutils" values
. tmp_file:
file COUNTS_DIR / "raw_counts" / "htseq_out.txt"
# {
echo {{SAMPLES}} | sed 's/ /\t/g' > {{values:tmp_file}}
htseq-count {{inputs::bams}} {{inputs:gtf}} 1>> {{values:tmp_file}} 2>> {{outputs:log}}
# move feature count stats (e.g. __no_feature etc) to another file
echo {{SAMPLES}} > {{outputs:stats_file}}
tail -n 5 {{values:tmp_file}} >> {{outputs:stats_file}}
# only keep feature counts in the counts table (remove stats)
head -n -5 {{values:tmp_file}} > {{outputs:counts_file}}
# remove temp file
rm {{values:tmp_file}}
}
process norm_counts_deseq
inputs
. counts_file:
file COUNTS_DIR / "raw_counts" / "counts_from_star.tsv"
. colDataFile:
first
process-outputs translate_sample_sheet_for_report
. script:
file SCRIPTS_DIR / "norm_counts_deseq.R"
outputs
. outdir:
file COUNTS_DIR / "normalized"
. size_factors:
file COUNTS_DIR / "normalized" / "deseq_size_factors.txt"
. norm_counts:
file COUNTS_DIR / "normalized" / "deseq_normalized_counts.tsv"
. log:
file LOG_DIR / "norm_counts_deseq.log"
packages
. "coreutils" . "r-minimal"
. "r-deseq2"
# {
{{RSCRIPT_EXEC}} {{inputs:script}} \
{{inputs:counts_file}} \
{{inputs:colDataFile}} \
{{outputs:outdir}} >> {{outputs:log}} 2>&1
}
process report1 (with analysis)
inputs
. logo: LOGO
. gtf: GTF_FILE
. counts:
file COUNTS_DIR / "raw_counts" / "counts_from_star.tsv"
. coldata:
first
process-outputs translate_sample_sheet_for_report
. reportR:
file SCRIPTS_DIR / "runDeseqReport.R"
. reportRmd:
file SCRIPTS_DIR / "deseqReport.Rmd"
outputs
. outdir:
file OUTPUT_DIR / "report"
file OUTPUT_DIR / "report" / analysis ".star.deseq.report.html"
. log:
file LOG_DIR / analysis ".report.star.log"
packages
. "coreutils" . "r-minimal"
. "r-corrplot"
. "r-crosstalk"
. "r-deseq2"
. "r-dt"
. "r-ggplot2"
. "r-ggrepel"
. "r-gprofiler"
. "r-knitr"
. "r-pheatmap"
. "r-plotly"
. "r-reshape2"
. "r-rmarkdown"
. "r-rtracklayer"
. "r-scales"
. "r-summarizedexperiment"
values
. analysis-name: analysis
. case:
get DE_ANALYSIS_LIST analysis "case_sample_groups"
. control:
get DE_ANALYSIS_LIST analysis "control_sample_groups"
. covariates:
get DE_ANALYSIS_LIST analysis "covariates"
# {
{{RSCRIPT_EXEC}} {{inputs:reportR}} \
--logo={{inputs:logo}} \
--prefix='{{values:analysis-name}}.star' \
--reportFile={{inputs:reportRmd}} \
--countDataFile={{inputs:counts}} \
--colDataFile={{inputs:coldata}} \
--gtfFile={{inputs:gtf}} \
--caseSampleGroups='{{values:case}}' \
--controlSampleGroups='{{values:control}}' \
--covariates='{{values:covariates}}' \
--workdir={{outputs:outdir}} \
--organism='{{ORGANISM}}' >> {{outputs:log}} 2>&1
}
process report2 (with analysis)
inputs
. logo: LOGO
. gtf: GTF_FILE
. counts:
file COUNTS_DIR / "raw_counts" / "counts_from_SALMON.transcripts.tsv"
. coldata:
first
process-outputs translate_sample_sheet_for_report
. reportR:
file SCRIPTS_DIR / "runDeseqReport.R"
. reportRmd:
file SCRIPTS_DIR / "deseqReport.Rmd"
outputs
. outdir:
file OUTPUT_DIR / "report"
file OUTPUT_DIR / "report" / analysis ".salmon.transcripts.deseq.report.html"
. log:
file LOG_DIR / analysis ".report.salmon.transcripts.log"
packages
. "coreutils" . "r-minimal"
. "r-corrplot"
. "r-crosstalk"
. "r-deseq2"
. "r-dt"
. "r-ggplot2"
. "r-ggrepel"
. "r-gprofiler"
. "r-knitr"
. "r-pheatmap"
. "r-plotly"
. "r-reshape2"
. "r-rmarkdown"
. "r-rtracklayer"
. "r-scales"
. "r-summarizedexperiment"
values
. analysis-name: analysis
. case:
get DE_ANALYSIS_LIST analysis "case_sample_groups"
. control:
get DE_ANALYSIS_LIST analysis "control_sample_groups"
. covariates:
get DE_ANALYSIS_LIST analysis "covariates"
# {
{{RSCRIPT_EXEC}} {{inputs:reportR}} \
--logo={{inputs:logo}} \
--prefix='{{values:analysis-name}}.salmon.transcripts' \
--reportFile={{inputs:reportRmd}} \
--countDataFile={{inputs:counts}} \
--colDataFile={{inputs:coldata}} \
--gtfFile={{inputs:gtf}} \
--caseSampleGroups='{{values:case}}' \
--controlSampleGroups='{{values:control}}' \
--covariates='{{values:covariates}}' \
--workdir={{outputs:outdir}} \
--organism='{{ORGANISM}}' >> {{outputs:log}} 2>&1
}
process report3 (with analysis)
inputs
. logo: LOGO
. gtf: GTF_FILE
. counts:
file COUNTS_DIR / "raw_counts" / "counts_from_SALMON.genes.tsv"
. coldata:
first
process-outputs translate_sample_sheet_for_report
. reportR:
file SCRIPTS_DIR / "runDeseqReport.R"
. reportRmd:
file SCRIPTS_DIR / "deseqReport.Rmd"
outputs
. outdir:
file OUTPUT_DIR / "report"
file OUTPUT_DIR / "report" / analysis ".salmon.genes.deseq.report.html"
. log:
file LOG_DIR / analysis ".report.salmon.genes.log"
packages
. "coreutils" . "r-minimal"
. "r-corrplot"
. "r-crosstalk"
. "r-deseq2"
. "r-dt"
. "r-ggplot2"
. "r-ggrepel"
. "r-gprofiler"
. "r-knitr"
. "r-pheatmap"
. "r-plotly"
. "r-reshape2"
. "r-rmarkdown"
. "r-rtracklayer"
. "r-scales"
. "r-summarizedexperiment"
values
. analysis-name: analysis
. case:
get DE_ANALYSIS_LIST analysis "case_sample_groups"
. control:
get DE_ANALYSIS_LIST analysis "control_sample_groups"
. covariates:
get DE_ANALYSIS_LIST analysis "covariates"
# {
{{RSCRIPT_EXEC}} {{inputs:reportR}} \
--logo={{inputs:logo}} \
--prefix='{{values:analysis-name}}.salmon.genes' \
--reportFile={{inputs:reportRmd}} \
--countDataFile={{inputs:counts}} \
--colDataFile={{inputs:coldata}} \
--gtfFile={{inputs:gtf}} \
--caseSampleGroups='{{values:case}}' \
--controlSampleGroups='{{values:control}}' \
--covariates='{{values:covariates}}' \
--workdir={{outputs:outdir}} \
--organism='{{ORGANISM}}' >> {{outputs:log}} 2>&1
}
define ANALYSES
map first DE_ANALYSIS_LIST
process final_report
synopsis "Produce a comprehensive report. This is the default target."
inputs
file OUTPUT_DIR / "star_index" / "SAindex"
file OUTPUT_DIR / "salmon_index" / "sa.bin"
file MULTIQC_DIR / "multiqc_report.html"
file COUNTS_DIR / "raw_counts" / "counts_from_SALMON.transcripts.tsv"
file COUNTS_DIR / "raw_counts" / "counts_from_SALMON.genes.tsv"
file COUNTS_DIR / "normalized" / "TPM_counts_from_SALMON.transcripts.tsv"
file COUNTS_DIR / "normalized" / "TPM_counts_from_SALMON.genes.tsv"
file COUNTS_DIR / "raw_counts" / "counts_from_star.tsv"
file COUNTS_DIR / "normalized" / "deseq_normalized_counts.tsv"
file COUNTS_DIR / "normalized" / "deseq_size_factors.txt"
files BIGWIG_DIR / SAMPLES ".forward.bigwig"
files BIGWIG_DIR / SAMPLES ".reverse.bigwig"
files OUTPUT_DIR / "report" / ANALYSES
list ".star.deseq.report.html"
. ".salmon.transcripts.deseq.report.html"
. ".salmon.genes.deseq.report.html"
procedure
' display "Final reports have been generated."
process deseq_report_star
synopsis "Produce one HTML report for each analysis based on STAR results."
inputs
files OUTPUT_DIR / "report" / ANALYSES ".star.deseq.report.html"
procedure
' display "STAR reports have been generated."
process deseq_report_salmon_transcripts
synopsis "Produce one HTML report for each analysis based on SALMON results at transcript level."
inputs
files OUTPUT_DIR / "report" / ANALYSES ".salmon.transcripts.deseq.report.html"
procedure
' display "SALMON transcript reports have been generated."
process deseq_report_salmon_genes
synopsis "Produce one HTML report for each analysis based on SALMON results at gene level."
inputs
files OUTPUT_DIR / "report" / ANALYSES ".salmon.genes.deseq.report.html"
procedure
' display "SALMON genes reports have been generated."
process star-map-done
synopsis "Produce a STAR mapping results in BAM file format."
inputs
files MAPPED_READS_DIR / SAMPLES "_Aligned.sortedByCoord.out.bam"
procedure
' display "STAR mappings have been generated."
process star_counts
synopsis "Get count matrix from STAR mapping results using summarizeOverlaps."
inputs
file COUNTS_DIR / "raw_counts" / "counts_from_star.tsv"
procedure
' display "STAR count matrix has been generated."
process genome_coverage
synopsis "Compute genome coverage values from BAM files - save in bigwig format"
inputs
files BIGWIG_DIR / SAMPLES (list ".forward" ".reverse") ".bigwig"
procedure
' display "Genome coverage has been computed."
process fastqc-done
synopsis "post-mapping quality control by FASTQC."
inputs
files FASTQC_DIR / SAMPLES "_Aligned.sortedByCoord.out_fastqc.zip"
procedure
' display "FastQC quality control completed."
process salmon-index-done
synopsis "Create SALMON index file."
inputs
file OUTPUT_DIR / "salmon_index" / "sa.bin"
procedure
' display "SALMON index has been generated."
process salmon-quant-done
synopsis "Calculate read counts per transcript using SALMON."
inputs
files SALMON_DIR / SAMPLES / (list "quant.sf" "quant.genes.sf")
procedure
' display "SALMON read counts has been computed."
process salmon-counts-done
synopsis "Get count matrix from SALMON quant."
inputs
file COUNTS_DIR / "raw_counts" / "counts_from_SALMON.transcripts.tsv"
file COUNTS_DIR / "raw_counts" / "counts_from_SALMON.genes.tsv"
file COUNTS_DIR / "normalized" / "TPM_counts_from_SALMON.transcripts.tsv"
file COUNTS_DIR / "normalized" / "TPM_counts_from_SALMON.genes.tsv"
procedure
' display "SALMON count matrix has been computed."
process multiqc-done
synopsis "Get multiQC report based on STAR alignments and fastQC reports."
inputs
file MULTIQC_DIR / "multiqc_report.html"
procedure
' display "multiQC report completed."
workflow rnaseq
processes
apply auto-connect
append
feed SAMPLES to
lambda : sample
if : single-end? sample
trim_galore_se sample
trim_galore_pe sample
. star_map
. sort_bam
. index_bam
. fastqc
. salmon_quant
. genomeCoverage
. count_reads
feed ANALYSES to
. report1
. report2
. report3
list
. translate_sample_sheet_for_report
. star_index
. salmon_index
. counts_from_SALMON
. multiqc
. collate_read_counts
. htseq_count
. norm_counts_deseq
list
. final_report
. deseq_report_star
. deseq_report_salmon_transcripts
. deseq_report_salmon_genes
. star-map-done
. star_counts
. genome_coverage
. fastqc-done
. salmon-index-done
. salmon-quant-done
. salmon-counts-done
. multiqc-done