pigx-rnaseq.w

;; PiGx RNAseq Pipeline.
;; 
;; Copyright © 2017, 2018 Bora Uyar <bora.uyar@mdc-berlin.de>
;; Copyright © 2017, 2018 Jonathan Ronen <yablee@gmail.com>
;; Copyright © 2017-2021 Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
;; 
;; This file is part of the PiGx RNAseq Pipeline.
;; 
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; 
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;; 
;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <http://www.gnu.org/licenses/>.

;; TODO: validate configuration file!
;; include: string-append (config['locations']['pkglibexecdir'], 'scripts/validate_input.py')
;; validate_config(config)


;;; Locations

define GTF_FILE
  get config "locations" "gtf-file"

define SAMPLE_SHEET_FILE
  get config "locations" "sample-sheet"

define GENOME_FASTA
  get config "locations" "genome-fasta"

define CDNA_FASTA
  get config "locations" "cdna-fasta"

define READS_DIR
  get config "locations" "reads-dir"

define OUTPUT_DIR
  get config "locations" "output-dir"

define LOGO
  if : getenv "PIGX_UNINSTALLED"
    string-append
      get config "locations" "pkgdatadir"
      "/images/Logo_PiGx.png"
    string-append
      get config "locations" "pkgdatadir"
      "/Logo_PiGx.png"

define SCRIPTS_DIR
  string-append
    get config "locations" "pkglibexecdir"
    "/scripts"

define TRIMMED_READS_DIR
  string-append OUTPUT_DIR "/trimmed_reads"

define LOG_DIR
  string-append OUTPUT_DIR "/logs"

define FASTQC_DIR
  string-append OUTPUT_DIR "/fastqc"

define MULTIQC_DIR
  string-append OUTPUT_DIR "/multiqc"

define MAPPED_READS_DIR
  string-append OUTPUT_DIR "/mapped_reads"

define BIGWIG_DIR
  string-append OUTPUT_DIR "/bigwig_files"

define COUNTS_DIR
  string-append OUTPUT_DIR "/feature_counts"

define SALMON_DIR
  string-append OUTPUT_DIR "/salmon_output"


;;; Tools

define : toolArgs name
  or
    false-if-exception
      get config "tools" name "args"
    ""

define : tool name
  string-append
    get config "tools" name "executable"
    " "
    toolArgs name

define FASTQC_EXEC
  tool "fastqc"
define MULTIQC_EXEC
  tool "multiqc"
define STAR_EXEC_MAP
  tool "star_map"
define STAR_EXEC_INDEX
  tool "star_index"
define SALMON_INDEX_EXEC
  tool "salmon_index"
define SALMON_QUANT_EXEC
  tool "salmon_quant"
define TRIM_GALORE_EXEC
  tool "trim-galore"
define SAMTOOLS_EXEC
  tool "samtools"
define HTSEQ_COUNT_EXEC
  tool "htseq-count"
define GUNZIP_EXEC
  tool "gunzip"
define RSCRIPT_EXEC
  tool "Rscript"
define SED_EXEC
  tool "sed"


;;; Configurations

define STAR_INDEX_THREADS
  get config "execution" "rules" "star_index" "threads"

define SALMON_INDEX_THREADS
  get config "execution" "rules" "salmon_index" "threads"

define STAR_MAP_THREADS
  get config "execution" "rules" "star_map" "threads"

define SALMON_QUANT_THREADS
  get config "execution" "rules" "salmon_quant" "threads"

define ORGANISM
  get config "organism"

define DE_ANALYSIS_LIST
  or
    false-if-exception
      get config "DEanalyses"
    list

;; Load sample sheet
with open(SAMPLE_SHEET_FILE, 'r') as fp:
  rows =  [row for row in csv.reader(fp, delimiter=',')]
  header = rows[0]; rows = rows[1:]
  SAMPLE_SHEET = [dict(zip(header, row)) for row in rows]

SAMPLES = [line['name'] for line in SAMPLE_SHEET]

define : lookup column predicate . fields
  "Convenience function to access fields of sample sheet columns that
match the predicate.  The predicate may be a string."
  define predicate*
    if : procedure? predicate
      . predicate
      cut equals? predicate <>
  define records
    filter
      lambda : line
        predicate*
          assoc-ref column line
      . SAMPLE_SHEET
  ;; Return only the selected fields
  map
    lambda : record
      map
        lambda : column-name
          assoc-ref record column-name
        . fields
    . records

define : reads-for-sample sample
  remove string-null?
    lookup "name" sample "reads" "reads2"

define : single-end? sample
  "Return #true if the SAMPLE library is single ended."
  equal? 1
    length : reads-for-sample sample

define : trimmed-reads-for-sample sample
  if : single-end? sample
    list
      string-append TRIMMED_READS_DIR "/" sample "_R.fastq.gz"
    list
      string-append TRIMMED_READS_DIR "/" sample "_R1.fastq.gz"
      string-append TRIMMED_READS_DIR "/" sample "_R2.fastq.gz"

define : trim-galore-input sample
  expand READS_DIR "/"
    reads-for-sample sample


;;; Processes

process translate_sample_sheet_for_report
  inputs
    . SAMPLE_SHEET_FILE
  outputs
    string-append
      cwd
      "/colData.tsv"
  packages
    . "r-minimal"
  # {
    {{RSCRIPT_EXEC}} {{SCRIPTS_DIR}}/translate_sample_sheet_for_report.R {{inputs}}
  }

process trim_galore_pe (with sample)
  inputs
    trim-galore-input sample
  outputs
    . r1:
    string-append TRIMMED_READS_DIR "/" sample "_R1.fastq.gz"
    . r2:
    string-append TRIMMED_READS_DIR "/" sample "_R2.fastq.gz"
    . log:
    string-append LOG_DIR "/trim_galore_" sample ".log"
  packages
    . "trim-galore"
  procedure
    define read1
      first
        lookup "name" sample "reads"
    define read2
      first
        lookup "name" sample "reads2"
    ;; TODO: use basename instead?  Or is the extension not always the same?
    ;; For example ".fastq.gz" and ".fq.gz"?
    define : without-extension file-name
      string-join
        drop-right
          string-split file-name #\.
          2
        "."
    define tmp1
      string-append TRIMMED_READS_DIR "/"
        without-extension read1
        "_val_1.fq.gz"
    define tmp2
      string-append TRIMMED_READS_DIR "/"
        without-extension read2
        "_val_2.fq.gz"
    # {
      {{TRIM_GALORE_EXEC}} -o {{TRIMMED_READS_DIR}} \
        --paired {{inputs}} >> {{outputs:log}} 2>&1 && \
        sleep 10 && \
        mv {{tmp1}} {{outputs:r1}} && \
        mv {{tmp2}} {{outputs:r2}}
    }

process trim_galore_se (with sample)
  inputs
    trim-galore-input sample
  outputs
    . trimmed:
    string-append TRIMMED_READS_DIR "/" sample "_R.fastq.gz"
    . log:
    string-append LOG_DIR "/trim_galore_" sample ".log"
  packages
    . "trim-galore"
  procedure
    define input1
      pick first inputs
    define file
      first
        lookup "name" sample "reads"
    ;; TODO: use basename instead?  Or is the extension not always the same?
    ;; For example ".fastq.gz" and ".fq.gz"?
    define without-extension
      string-join
        drop-right
          string-split file #\.
          2
        "."
    define tmp
      string-append TRIMMED_READS_DIR "/" without-extension "_trimmed.fq.gz"
    # {
      {{TRIM_GALORE_EXEC}} -o {{TRIMMED_READS_DIR}} \
        {{input1}} >> {{outputs:log}} 2>&1 && \
        sleep 10 && \
        mv {{tmp}} {{outputs:trimmed}}
    }

process star_index
  inputs
    . gtf: GTF_FILE
    . genome: GENOME_FASTA
  outputs
    . star_index_file:
    string-append OUTPUT_DIR "/star_index/SAindex"
    . star_index_dir:
    string-append OUTPUT_DIR "/star_index"
    . log:
    string-append LOG_DIR "/star_index.log"
  packages
    . "star"
  # {
    {{STAR_EXEC_INDEX}} \
      --runMode genomeGenerate \
      --runThreadN {{STAR_INDEX_THREADS}} \
      --genomeDir {{outputs:star_index_dir}} \
      --genomeFastaFiles {{inputs:genome}} \
      --sjdbGTFfile {{inputs:gtf}} >> {{outputs:log}} 2>&1
  }

process star_map (with sample)
  inputs
    ;; This process really depends on the whole directory (see
    ;; params.index_dir), but we can't register it as an input/output
    ;; in its own right since Snakemake 5.
    . index_file:
    pick star_index_file:
      process-outputs star_index
    . index_dir:
    pick star_index_dir:
      process-outputs star_index
    . reads:
    trimmed-reads-for-sample sample
  outputs
    string-append MAPPED_READS_DIR "/" sample "_Aligned.out.bam"
    . log:
    string-append LOG_DIR "/star_map_" sample ".log"
  packages
    . "star"
    . "gzip"
  procedure
    define output_prefix
      string-append MAPPED_READS_DIR "/" sample "_"
    # {
      {{STAR_EXEC_MAP}}                         \
        --runThreadN {{STAR_MAP_THREADS}}       \
        --genomeDir {{inputs:index_dir}}        \
        --readFilesIn {{input::reads}}          \
        --readFilesCommand '{{GUNZIP_EXEC}} -c' \
        --outSAMtype BAM Unsorted               \
        --outFileNamePrefix {{output_prefix}} >> {{outputs:log}} 2>&1
  }

process sort_bam (with sample)
  inputs
    string-append MAPPED_READS_DIR "/" sample "_Aligned.out.bam"
  outputs
    . bam:
    string-append MAPPED_READS_DIR "/" sample "_Aligned.sortedByCoord.out.bam"
    . log:
    string-append LOG_DIR "/samtools_sort_" sample ".log"
  packages
    . "samtools"
  # {
    {{SAMTOOLS_EXEC}} sort -o {{outputs:bam}} {{inputs}} >> {{outputs:log}} 2>&1
  }

process index_bam (with sample)
  inputs
    string-append MAPPED_READS_DIR "/" sample "_Aligned.sortedByCoord.out.bam"
  outputs
    . bai:
    string-append MAPPED_READS_DIR  "/" sample "_Aligned.sortedByCoord.out.bam.bai"
    . log:
    string-append LOG_DIR "/samtools_index_" sample ".log"
  packages
    . "samtools"
  # {
    {{SAMTOOLS_EXEC}} index {{inputs}} {{outputs:bai}} >> {{outputs:log}} 2>&1
  }

process fastqc (with sample)
  inputs
    string-append MAPPED_READS_DIR "/" sample "_Aligned.sortedByCoord.out.bam"
  outputs
    string-append FASTQC_DIR "/" sample "_Aligned.sortedByCoord.out_fastqc.zip"
    . log:
    string-append LOG_DIR "/fastqc_" sample ".log"
  packages
    . "fastqc"
  # {
    {{FASTQC_EXEC}} -o {{FASTQC_DIR}} -f bam {{inputs}} >> {{outputs:log}} 2>&1
  }

process salmon_index
  inputs
    . CDNA_FASTA
  outputs
    . salmon_index_dir:
    string-append OUTPUT_DIR "/salmon_index"
    . salmon_index_file:
    string-append OUTPUT_DIR "/salmon_index/sa.bin"
    . log:
    string-append LOG_DIR "/salmon_index.log"
  packages
    . "salmon"
  # {
    {{SALMON_INDEX_EXEC}} -t {{inputs}} \
      -i {{outputs:salmon_index_dir}}   \
      -p {{SALMON_INDEX_THREADS}} >> {{outputs:log}} 2>&1
  }

;; This process really depends on the whole directory, not just the index_file (see
;; params.index_dir), but we can't register it as an input/output
;; in its own right since Snakemake 5.
process salmon_quant (with sample)
  inputs
    . gtf: GTF_FILE
    . index_file:
    pick salmon_index_file:
      process-output salmon_index
    . index_dir:
    pick salmon_index_dir:
      process-outputs salmon_index ; TODO: this is actually "params" instead of "outputs"
    . reads:
    trimmed-reads-for-sample sample
  outputs
    string-append SALMON_DIR "/" sample "/quant.sf"
    string-append SALMON_DIR "/" sample "/quant.genes.sf"
    . outfolder:
    string-append SALMON_DIR "/" sample
    . log:
    string-append LOG_DIR "/salmon_quant_" sample ".log"
  packages
    . "salmon"
  procedure
    define reads
      pick * reads: inputs
    define single?
      equal? 1
        length reads
    define arguments
      if single?
        string-append
          "-r " (first reads) " "
        string-append
          "-1 " (first reads) " -2 " (second reads) " "
    # {
      {{SALMON_QUANT_EXEC}}              \
        -i {{inputs:index_dir}}          \
        -l A -p {{SALMON_QUANT_THREADS}} \
        {{arguments}}                    \
        -o {{outputs:outfolder}}         \
        --seqBias --gcBias               \
        -g {{inputs:gtf}} >> {{outputs:log}} 2>&1
    }

process counts_from_SALMON
  inputs
    . quantFiles:
    expand SALMON_DIR "/" SAMPLES "/quant.sf"
    . quantGenesFiles:
    expand SALMON_DIR "/" SAMPLES "/quant.genes.sf"
    . colDataFile
    pick first
      process-outputs translate_sample_sheet_for_report
    . script
    string-append SCRIPTS_DIR "/counts_matrix_from_SALMON.R"
  outputs
    string-append COUNTS_DIR "/raw_counts/counts_from_SALMON.transcripts.tsv"
    string-append COUNTS_DIR "/raw_counts/counts_from_SALMON.genes.tsv"
    string-append COUNTS_DIR "/normalized/TPM_counts_from_SALMON.transcripts.tsv"
    string-append COUNTS_DIR "/normalized/TPM_counts_from_SALMON.genes.tsv"
    . log:
    string-append LOG_DIR "/salmon_import_counts.log"
  packages
    . "r-minimal"
  # {
    {{RSCRIPT_EXEC}} {{script}} \
      {{SALMON_DIR}} {{COUNTS_DIR}} {{inputs:colDataFile}} >> {{outputs:log}} 2>&1
  }

process genomeCoverage (with sample)
  inputs
    . size_factors_file:
    string-append COUNTS_DIR "/normalized/deseq_size_factors.txt"
    . bam:
    string-append MAPPED_READS_DIR "/" sample "_Aligned.sortedByCoord.out.bam"
    . bai:
    string-append MAPPED_READS_DIR "/" sample "_Aligned.sortedByCoord.out.bam.bai"
    . script:
    string-append SCRIPTS_DIR "/export_bigwig.R"
  outputs
    expand
      . BIGWIG_DIR "/" sample
      list ".forward" ".reverse"
      . ".bigwig"
    . log:
    string-append LOG_DIR "/genomeCoverage_" sample ".log"
  packages
    . "r-minimal"
  # {
    {{RSCRIPT_EXEC}} {{script}} \
      {{inputs:bam}} {{sample}} {{inputs:size_factors_file}} {{BIGWIG_DIR}} >> {{outputs:log}} 2>&1
  }      

process multiqc
  inputs
    . salmon_output:
    expand SALMON_DIR "/" SAMPLES "/quant.sf"
    . star_output:
    expand MAPPED_READS_DIR "/" SAMPLES "_Aligned.sortedByCoord.out.bam"
    . fastqc_output:
    expand FASTQC_DIR "/" SAMPLES "_Aligned.sortedByCoord.out_fastqc.zip"
  outputs
    string-append MULTIQC_DIR "/multiqc_report.html"
    . log:
    string-append LOG_DIR "/multiqc.log"
  packages
    . "multiqc"
  ;; TODO: what is OUTPUT_DIR?  Why isn't this using the declared inputs?
  # {
    {{MULTIQC_EXEC}} -o {{MULTIQC_DIR}} {{OUTPUT_DIR}} >> {{outputs:log}} 2>&1
  }
    
process count_reads (with sample)
  inputs
    . gtf: GTF_FILE
    . bam:
    string-append MAPPED_READS_DIR "/" sample "_Aligned.sortedByCoord.out.bam"
    . bai:
    string-append MAPPED_READS_DIR "/" sample "_Aligned.sortedByCoord.out.bam.bai"
    . script:
    string-append SCRIPTS_DIR "/count_reads.R"
  outputs
    string-append MAPPED_READS_DIR "/" sample ".read_counts.csv"
    . log:
    string-append LOG_DIR "/" sample ".count_reads.log"
  params:
    define single_end
      single-end? sample
    define mode
      get config "counting" "counting_mode"
    define nonunique
      get config "counting" "count_nonunique"
    define strandedness
      get config "counting" "strandedness"
    define feature
      get config "counting" "feature"
    define group_by
      get config "counting" "group_feature_by"
    define yield_size
      get config "counting" "yield_size"
    # {
      {{RSCRIPT_EXEC}} {{script}} \
        {{sample}} {{inputs:bam}} {{inputs:gtf}} \
        {{single_end}} {{mode}} {{nonunique}} {{strandedness}} \
        {{feature}} {{group_by}} {{yield_size}} >> {{outputs:log}} 2>&1
    }

process collate_read_counts
  inputs
    . counts:
    expand MAPPED_READS_DIR "/" SAMPLES ".read_counts.csv"
    . script:
    string-append SCRIPTS_DIR "/collate_read_counts.R"
  outputs
    string-append COUNTS_DIR "/raw_counts/counts_from_star.tsv"
    . log:
    string-append LOG_DIR "/collate_read_counts.log"
  packages
    . "r-minimal"
  # {
    {{RSCRIPT_EXEC}} {{script}} {{MAPPED_READS_DIR}} {{outputs}} >> {{outputs:log}} 2>&1
  }

process htseq_count
  inputs
    . gtf: GTF_FILE
    . bams:
    expand MAPPED_READS_DIR "/" SAMPLES "_Aligned.sortedByCoord.out.bam"
  outputs
    . stats_file:
    string-append COUNTS_DIR "/raw_counts/htseq_stats.txt"
    . counts_file:
    string-append COUNTS_DIR "/raw_counts/counts_from_star_htseq-count.txt"
    . log:
    string-append LOG_DIR "/htseq-count.log"
  packages
    . "sed"
    . "htseq" ; htseq-count
    . "coreutils" ; head, tail, rm
  procedure
    define tmp_file
      string-append COUNTS_DIR "/raw_counts/htseq_out.txt"
    # {
        echo {{SAMPLES}} | sed 's/ /\t/g' > {{tmp_file}}
        htseq-count {inputs::bams}} {{inputs:gtf}} 1>> {{tmp_file}} 2>> {{outputs:log}}

        # move feature count stats (e.g. __no_feature etc) to another file
        echo {{SAMPLES}} > {{outputs:stats_file}}
        tail -n 5 {{tmp_file}} >> {{outputs:stats_file}}

        # only keep feature counts in the counts table (remove stats)
        head -n -5 {{tmp_file}} > {{outputs:counts_file}}

        # remove temp file
        rm {{tmp_file}}
    }

;; Create a normalized counts table including all samples using the
;; median-of-ratios normalization procedure of deseq2.
process norm_counts_deseq
  inputs
    . counts_file:
    string-append COUNTS_DIR "/raw_counts/counts_from_star.tsv"
    . colDataFile:
    pick first
      process-outputs translate_sample_sheet_for_report
    . script:
    string-append SCRIPTS_DIR "/norm_counts_deseq.R"
  outputs
    . outdir:
    string-append COUNTS_DIR "/normalized"
    . size_factors:
    string-append COUNTS_DIR "/normalized/deseq_size_factors.txt"
    . norm_counts:
    string-append COUNTS_DIR "/normalized/deseq_normalized_counts.tsv"
    . log:
    string-append LOG_DIR "/norm_counts_deseq.log"
  packages
    . "r-minimal"
  # {
    {{RSCRIPT_EXEC}} {{script}} \
      {{inputs:counts_file}} \
      {{inputs:colDataFile}} \
      {{outputs:outdir}} >> {{outputs:log}} 2>&1
  }

process report1 (with analysis)
  inputs
    . gtf: GTF_FILE
    . counts:
    string-append COUNTS_DIR "/raw_counts/counts_from_star.tsv"
    . coldata:
    pick first
      process-outputs translate_sample_sheet_for_report
  outputs
    string-append OUTPUT_DIR "/report/" analysis ".star.deseq.report.html"
    . log:
    string-append LOG_DIR "/" analysis ".report.star.log"
  packages
    . "r-minimal"
  procedure
    define outdir
      string-append OUTPUT_DIR "/report"
    define reportR
      string-append SCRIPTS_DIR "/runDeseqReport.R"
    define reportRmd
      string-append SCRIPTS_DIR "/deseqReport.Rmd"
    define case
      get DE_ANALYSIS_LIST analysis "case_sample_groups"
    define control
      get DE_ANALYSIS_LIST analysis "control_sample_groups"
    define covariates
      get DE_ANALYSIS_LIST analysis "covariates"
    # {
      {{RSCRIPT_EXEC}} {{reportR}} \
        --logo={{LOGO}} \
        --prefix='{{analysis}}.star' \
        --reportFile={{reportRmd}} \
        --countDataFile={{inputs:counts}} \
        --colDataFile={{inputs:coldata}} \
        --gtfFile={{inputs:gtf}} \
        --caseSampleGroups='{{case}}' \
        --controlSampleGroups='{{control}}' \
        --covariates='{{covariates}}' \
        --workdir={{outdir}} \
        --organism='{{ORGANISM}}' >> {{outputs:log}} 2>&1
    }

process report2 (with analysis)
  inputs
    . gtf: GTF_FILE
    . counts: : string-append COUNTS_DIR "/raw_counts/counts_from_SALMON.transcripts.tsv"
    . coldata:
    pick first
      process-outputs translate_sample_sheet_for_report
  outputs
    string-append OUTPUT_DIR "/report/" analysis ".salmon.transcripts.deseq.report.html"
    . log:
    string-append LOG_DIR "/" analysis ".report.salmon.transcripts.log"
  packages
    . "r-minimal"
  procedure
    define outdir
      string-append OUTPUT_DIR "/report"
    define reportR
      string-append SCRIPTS_DIR "/runDeseqReport.R"
    define reportRmd
      string-append SCRIPTS_DIR "/deseqReport.Rmd"
    define case
      get DE_ANALYSIS_LIST analysis "case_sample_groups"
    define control
      get DE_ANALYSIS_LIST analysis "control_sample_groups"
    define covariates
      get DE_ANALYSIS_LIST analysis "covariates"
    # {
      {{RSCRIPT_EXEC}} {{reportR}} \
        --logo={{LOGO}} \
        --prefix='{{analysis}}.salmon.transcripts' \
        --reportFile={{reportRmd}} \
        --countDataFile={{inputs:counts}} \
        --colDataFile={{inputs:coldata}} \
        --gtfFile={{inputs:gtf}} \
        --caseSampleGroups='{{case}}' \
        --controlSampleGroups='{{control}}' \
        --covariates='{{covariates}}' \
        --workdir={{outdir}} \
        --organism='{{ORGANISM}}' >> {{outputs:log}} 2>&1
    }

process report3 (with analysis)
  inputs
    . gtf: GTF_FILE
    . counts:
    string-append COUNTS_DIR "/raw_counts/counts_from_SALMON.genes.tsv"
    . coldata:
    pick first
      process-outputs translate_sample_sheet_for_report
  outputs
    string-append OUTPUT_DIR "/report/" analysis ".salmon.genes.deseq.report.html"
    . log:
    string-append LOG_DIR "/" analysis ".report.salmon.genes.log"
  packages
    . "r-minimal"
  procedure
    define outdir
      string-append OUTPUT_DIR "/report"
    define reportR
      string-append SCRIPTS_DIR "/runDeseqReport.R"
    define reportRmd
      string-append SCRIPTS_DIR "/deseqReport.Rmd"
    define case
      get DE_ANALYSIS_LIST analysis "case_sample_groups"
    define control
      get DE_ANALYSIS_LIST analysis "control_sample_groups"
    define covariates
      get DE_ANALYSIS_LIST analysis "covariates"
    # {
      {{RSCRIPT_EXEC}} {{reportR}} \
        --logo={{LOGO}} \
        --prefix='{{analysis}}.salmon.genes' \
        --reportFile={{reportRmd}} \
        --countDataFile={{inputs:counts} \
        --colDataFile={{inputs:coldata}} \
        --gtfFile={{inputs:gtf}} \
        --caseSampleGroups='{{case}}' \
        --controlSampleGroups='{{control}}' \
        --covariates='{{covariates}}' \
        --workdir={{outdir}} \
        --organism='{{ORGANISM}}' >> {{outputs:log}} 2>&1
    }


process final-report
  synopsis "Produce a comprehensive report.  This is the default target."
  outputs
    list
      string-append OUTPUT_DIR "star_index" "SAindex"
      string-append OUTPUT_DIR "salmon_index" "sa.bin"
      string-append MULTIQC_DIR "multiqc_report.html"
      string-append COUNTS_DIR "raw_counts" "counts_from_SALMON.transcripts.tsv"
      string-append COUNTS_DIR "raw_counts" "counts_from_SALMON.genes.tsv"
      string-append COUNTS_DIR "normalized" "TPM_counts_from_SALMON.transcripts.tsv"
      string-append COUNTS_DIR "normalized" "TPM_counts_from_SALMON.genes.tsv"
      string-append COUNTS_DIR "raw_counts" "counts_from_star.tsv"
      string-append COUNTS_DIR "normalized" "deseq_normalized_counts.tsv"
      string-append COUNTS_DIR "normalized" "deseq_size_factors.txt"

	  expand(string-append (BIGWIG_DIR, '{sample}.forward.bigwig'), sample = SAMPLES) +
      expand(string-append (BIGWIG_DIR, '{sample}.reverse.bigwig'), sample = SAMPLES) +
      expand(string-append (OUTPUT_DIR, "report", '{analysis}.star.deseq.report.html'), analysis = DE_ANALYSIS_LIST.keys()) +
      expand(string-append (OUTPUT_DIR, "report", '{analysis}.salmon.transcripts.deseq.report.html'), analysis = DE_ANALYSIS_LIST.keys()) +
      expand(string-append (OUTPUT_DIR, "report", '{analysis}.salmon.genes.deseq.report.html'), analysis = DE_ANALYSIS_LIST.keys())

targets = {
    'final-report': {
        'files':
    },
    'deseq_report_star': {
        'description': "Produce one HTML report for each analysis based on STAR results.",
        'files':
          expand(string-append (OUTPUT_DIR, "report", '{analysis}.star.deseq.report.html'), analysis = DE_ANALYSIS_LIST.keys())
    },
    'deseq_report_salmon_transcripts': {
        'description': "Produce one HTML report for each analysis based on SALMON results at transcript level.",
        'files':
          expand(string-append (OUTPUT_DIR, "report", '{analysis}.salmon.transcripts.deseq.report.html'), analysis = DE_ANALYSIS_LIST.keys())
    },
    'deseq_report_salmon_genes': {
        'description': "Produce one HTML report for each analysis based on SALMON results at gene level.",
        'files':
          expand(string-append (OUTPUT_DIR, "report", '{analysis}.salmon.genes.deseq.report.html'), analysis = DE_ANALYSIS_LIST.keys())
    },
    'star_map' : {
        'description': "Produce a STAR mapping results in BAM file format.",
        'files':
          expand(string-append (MAPPED_READS_DIR, '{sample}_Aligned.sortedByCoord.out.bam'), sample = SAMPLES)
    },
    'star_counts': {
        'description': "Get count matrix from STAR mapping results using summarizeOverlaps.",
        'files':
          [string-append (COUNTS_DIR, "raw_counts", "counts_from_star.tsv")]
    },
    'genome_coverage': {
        'description': "Compute genome coverage values from BAM files - save in bigwig format",
        'files':
          expand(string-append (BIGWIG_DIR, '{sample}.forward.bigwig'), sample = SAMPLES) +
          expand(string-append (BIGWIG_DIR, '{sample}.reverse.bigwig'), sample = SAMPLES)
    },
    'fastqc': {
        'description': "post-mapping quality control by FASTQC.",
        'files':
          expand(string-append (FASTQC_DIR, '{sample}_Aligned.sortedByCoord.out_fastqc.zip'), sample = SAMPLES)
    },
    'salmon_index' : {
        'description': "Create SALMON index file.",
        'files':
          [string-append (OUTPUT_DIR, 'salmon_index', "sa.bin")]
    },
    'salmon_quant' : {
        'description': "Calculate read counts per transcript using SALMON.",
        'files':
          expand(string-append (SALMON_DIR, "{sample}", "quant.sf"), sample = SAMPLES) +
	  expand(string-append (SALMON_DIR, "{sample}", "quant.genes.sf"), sample = SAMPLES)
    },
    'salmon_counts': {
        'description': "Get count matrix from SALMON quant.",
        'files':
          [string-append (COUNTS_DIR, "raw_counts", "counts_from_SALMON.transcripts.tsv"),
	   string-append (COUNTS_DIR, "raw_counts", "counts_from_SALMON.genes.tsv"),
	   string-append (COUNTS_DIR, "normalized",  "TPM_counts_from_SALMON.transcripts.tsv"),
	   string-append (COUNTS_DIR, "normalized", "TPM_counts_from_SALMON.genes.tsv")]
    },
    'multiqc': {
        'description': "Get multiQC report based on STAR alignments and fastQC reports.",
        'files':
          [string-append (MULTIQC_DIR, 'multiqc_report.html')]
    }
}

# Selected output files from the above set.
selected_targets = config['execution']['target'] or ['final-report']
Generated by Ricardo Wurmus using scpaste at Fri Jan 15 15:28:52 2021. CET. (original)