diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..4af8fe5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +assets/databases/emu_database/species_taxid.fasta +assets/databases/emu_database/taxonomy.tsv +assets/databases/krona/taxonomy/images.dmp +assets/databases/krona/taxonomy/taxonomy.tab diff --git a/CITATIONS.md b/CITATIONS.md index 432b20f..1e26ee4 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -20,6 +20,10 @@ This pipeline uses code and infrastructure developed and maintained by the [nf-c - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200/479) + + > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet. journal 17.1 (2011): pp-10. doi: 10.14806/ej.17.1.200. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index 507ad34..60186d9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Longfilt, EMU, and Krona. EMU is the tool that does the taxonomic profiling of ensures portability and reproducibility across different computational infrastructures. It has been tested on Linux and on mac M1 (not recommended, quite slow). FastQC and Nanoplot performs quality control, Porechop_ABI trims -adapters (optional)), Longfilt filters the fastq-files such that only reads +adapters (optional), Longfilt filters the fastq-files such that only reads that are close to 1500 bp are used (optional), EMU assigns taxonomic classifications, and Krona visualises the result table from EMU. The pipeline enables microbial community analysis, offering insights into the diversity in @@ -35,9 +35,9 @@ and update software dependencies. ![Pipeline overview image](docs/images/gms_16s_20240415.png) -Roadmap/workflow. Only the NanoPore flow is available. Minor testing has been -done for PacBio and it seems to work. short read has no support yet. MultiQC -collects only info from FastQC and some information about software versions and + The Nanopore and shortread workflow is available. +Minor testing has been done for PacBio and it seems to work. +MultiQC collects only info from FastQC and some information about software versions and pipeline info. ![Krona plot](https://github.com/genomic-medicine-sweden/gms_16S/assets/115690981/dcdd5da4-135c-48c4-b64f-82f0452b5520) @@ -111,12 +111,45 @@ nextflow run main.nf \ --barcodes_samplesheet /[absolute path to barcode sample sheet]/sample_sheet_merge.csv ``` +## Runs with shortreads + +When running gms_16s with short reads, the primer sequences are trimmed using cutadapt by default using the provided primer sequences. +The primer sequences can be provided in the samplesheet or passed as arguments (FW_primer, RV_primer). Primer trimming with cutadapt can be skipped with --skip_cutadapt. + +```bash +sample,fastq_1,fastq_2,FW_primer,RV_primer +SAMPLE,/absolute_path/gms_16s/Sample_R1_001.fastq.gz,/absolute_path/gms_16s/Sample_R2_001.fastq.gz,GTGCCAGCMGCCGCGGTAA,GGACTACNVGGGTWTCTAAT +``` + + +```bash +nextflow run main.nf \ + --input sample_sheet.csv + --outdir [absolute path]/gms_16S/results \ + --db /[absolute path]/gms_16S/assets/databases/emu_database \ + --seqtype sr \ + -profile singularity +``` + +```bash +nextflow run main.nf \ + --input sample_sheet.csv + --outdir [absolute path]/gms_16S/results \ + --db /[absolute path]/gms_16S/assets/databases/emu_database \ + --seqtype sr \ + -profile singularity \ + --FW_primer AGCTGNCCTG\ + --RV_primer TGCATNCTGA +``` + + + ## Sample sheets There are two types of sample sheets that can be used: 1) If the fastq files are already concatenated/merged i.e., the fastq-files in Nanopore barcode directories have been concataned already, the `--input` can be used. -`--input` expects a `.csv` sample sheet with 3 columns (note the header +`--input` expects a `.csv` sample sheet with 4 columns (note the header names). It looks like this (See also the `examples` directory): ```csv diff --git a/conf/modules.config b/conf/modules.config index ccd622a..620ce14 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -92,6 +92,32 @@ process { } + withName: CUTADAPT { + ext.args = { [ + "--minimum-length 1", + "-O ${params.cutadapt_min_overlap}", + "-e ${params.cutadapt_max_error_rate}", + // Use primers from the samplesheet if available, otherwise fall back to params + meta.fw_primer ? "-g ${meta.fw_primer}" : (params.FW_primer ? "-g ${params.FW_primer}" : ''), + meta.rv_primer ? "-G ${meta.rv_primer}" : (params.RV_primer ? "-G ${params.RV_primer}" : ''), + params.retain_untrimmed ? '' : "--discard-untrimmed" + ].findAll { it }.join(' ').trim() } // Remove empty strings and join arguments + + ext.prefix = { "${meta.id}.trimmed" } + + publishDir = [ + [ path: { "${params.outdir}/cutadapt" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ path: { "${params.outdir}/cutadapt/trimmed_reads" }, + mode: params.publish_dir_mode, + pattern: "*.trim.fastq.gz", + enabled: params.save_intermediates + ] + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf new file mode 100755 index 0000000..8d168ef --- /dev/null +++ b/modules/nf-core/cutadapt/main.nf @@ -0,0 +1,51 @@ +process CUTADAPT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cutadapt:4.6--py39hf95cd2a_1' : + 'quay.io/biocontainers/cutadapt:4.6--py39hf95cd2a_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.trim.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" + """ + cutadapt \\ + -Z \\ + --cores $task.cpus \\ + $args \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "${prefix}.trim.fastq.gz" : "${prefix}_1.trim.fastq.gz ${prefix}_2.trim.fastq.gz" + """ + touch ${prefix}.cutadapt.log + touch ${trimmed} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cutadapt/meta.yml b/modules/nf-core/cutadapt/meta.yml new file mode 100755 index 0000000..c6f736c --- /dev/null +++ b/modules/nf-core/cutadapt/meta.yml @@ -0,0 +1,58 @@ +me: cutadapt +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - adapter trimming + - adapters + - quality trimming +tools: + - cuatadapt: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. + documentation: https://cutadapt.readthedocs.io/en/stable/index.html + doi: 10.14806/ej.17.1.200 + licence: ["MIT"] + identifier: biotools:cutadapt +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.trim.fastq.gz": + type: file + description: The trimmed/modified fastq reads + pattern: "*fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: cuatadapt log file + pattern: "*cutadapt.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/nextflow.config b/nextflow.config index cc1bdf4..d7a7485 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,8 +22,15 @@ params { keep_files = false output_unclassified = true +//cutadapt + FW_primer = null + RV_primer = null + cutadapt_min_overlap = 3 + cutadapt_max_error_rate = 0.1 + retain_untrimmed = false + skip_cutadapt = false + save_intermediates = false - // // porechop_abi adapter_trimming = false diff --git a/nextflow_schema.json b/nextflow_schema.json index fd960b1..92b30bb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -134,6 +134,50 @@ "description": "minimum mean quality threshold" } } + }, + "cutadapt_options": { + "title": "Cutadapt options", + "type": "object", + "description": "Options for cutadapt which is used for removing adapter sequences", + "default": "", + "properties": { + "FW_primer": { + "type": "string", + "description": "Forward primer" + }, + "RV_primer": { + "type": "string", + "description": "Reverse primer" + }, + "cutadapt_max_error_rate": { + "type": "number", + "default": 0.1, + "description": "Sets the maximum error rate for valid matches of primer sequences with reads for cutadapt (-e)." + }, + + "cutadapt_min_overlap": { + "type": "integer", + "default": 3, + "description": "Minimum overlap for valid matches of primer sequences with reads for cutadapt (-O)." + }, + + "retain_untrimmed": { + "type": "boolean", + "description": "Cutadapt will retain untrimmed reads, choose only if input reads are not expected to contain primer sequences.", + "default": true + }, + "save_intermediates": { + "type": "boolean", + "default": false, + "description": "Save trimmed files from cutadapt " + }, + + "skip_cutadapt": { + "type": "boolean", + "default": false, + "description": "Skip primer trimming with cutadapt" + } + } }, "krona_options": { "title": "krona_options", diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 0aecf87..454a51a 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -22,12 +22,14 @@ workflow INPUT_CHECK { // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] def create_fastq_channel(LinkedHashMap row) { - // create meta map + // Create meta map def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + meta.fw_primer = row.FW_primer + meta.rv_primer = row.RV_primer - // add path(s) of the fastq file(s) to the meta map + // Add path(s) of the fastq file(s) to the meta map def fastq_meta = [] if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" diff --git a/workflows/gmsemu.nf b/workflows/gmsemu.nf index 022e2e5..d9b3e49 100644 --- a/workflows/gmsemu.nf +++ b/workflows/gmsemu.nf @@ -9,12 +9,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) // Validate input parameters WorkflowGmsemu.initialise(params, log) -// TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist - -// def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -// for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - def checkPathParamList = [] if (!params.merge_fastq_pass) { checkPathParamList += params.input @@ -23,14 +18,13 @@ checkPathParamList += [params.multiqc_config, params.fasta] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters -// if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.input) { ch_input = file(params.input) - } else if (params.merge_fastq_pass) { - // do nothing. - } else { - exit 1, 'Input samplesheet not specified. Unless '--merge_fastq_pass' is used, a sample_sheet.csv must be defined!' - } +} else if (params.merge_fastq_pass) { + // Do nothing. +} else { + exit 1, "Input samplesheet not specified. Unless '--merge_fastq_pass' is used, a sample_sheet.csv must be defined!" +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -39,8 +33,8 @@ if (params.input) { */ ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* @@ -49,9 +43,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// include { INPUT_CHECK } from '../subworkflows/local/input_check' /* @@ -60,87 +51,46 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Installed directly from nf-core/modules -// include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf' include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf' include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf' -//include { FALCO } from '../modules/nf-core/falco/main.nf' -include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf' -include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf' -include { PORECHOP_ABI } from '../modules/nf-core/porechop/abi/main.nf' -include { FILTLONG } from '../modules/nf-core/filtlong/main.nf' include { EMU_ABUNDANCE } from '../modules/local/emu/abundance/main.nf' include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main.nf' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { FASTQC } from '../modules/nf-core/fastqc/main' +include { CUTADAPT } from '../modules/nf-core/cutadapt/main.nf' +include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf' +include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf' +include { PORECHOP_ABI } from '../modules/nf-core/porechop/abi/main.nf' +include { FILTLONG } from '../modules/nf-core/filtlong/main.nf' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow GMSEMU { - ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - - if ( params.merge_fastq_pass && !params.barcodes_samplesheet) { - MERGE_BARCODES (params.merge_fastq_pass) - //GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged")) + // Merge fastq and generate input based on seqtype + if (params.merge_fastq_pass && !params.barcodes_samplesheet) { + MERGE_BARCODES(params.merge_fastq_pass) GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged) - // ch_input = file(params.outdir + 'samplesheet_merged.csv') ch_input = GENERATE_INPUT.out.sample_sheet_merged - } else if ( params.merge_fastq_pass && params.barcodes_samplesheet) { - MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass) -// merged_files = (params.outdir + '/fastq_pass_merged') - GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) + } else if (params.merge_fastq_pass && params.barcodes_samplesheet) { + MERGE_BARCODES_SAMPLESHEET(params.barcodes_samplesheet, params.merge_fastq_pass) + GENERATE_INPUT(MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) ch_input = GENERATE_INPUT.out.sample_sheet_merged } - - - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) + // Validate and stage input files + INPUT_CHECK(ch_input) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - - // - // MODULE: Run Falco - // FALCO ( - // INPUT_CHECK.out.reads - // ) - - - - // - // MODULE: Run Nanoplot1 - NANOPLOT1 ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(NANOPLOT1.out.versions.first()) - - - - // NANOPLOT2 ( - // INPUT_CHECK.out.reads - // ) - - - // // MODULE: Run FastQC // @@ -149,79 +99,72 @@ workflow GMSEMU { ) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + if (params.seqtype == "map-ont") { + NANOPLOT1(INPUT_CHECK.out.reads) + ch_versions = ch_versions.mix(NANOPLOT1.out.versions.first()) + if (params.adapter_trimming && !params.quality_filtering) { + PORECHOP_ABI(INPUT_CHECK.out.reads) + ch_processed_reads = PORECHOP_ABI.out.reads + .map { meta, reads -> [meta + [single_end: 1], reads] } - // MODULE: Run PORECHOP_ABI and filtering - // - if ( params.adapter_trimming && !params.quality_filtering) { - PORECHOP_ABI ( INPUT_CHECK.out.reads ) - - ch_processed_reads = PORECHOP_ABI.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } + ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(PORECHOP_ABI.out.log) - ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_ABI.out.log ) + } else if (!params.adapter_trimming && params.quality_filtering) { + ch_processed_reads = FILTLONG( + INPUT_CHECK.out.reads.map { meta, reads -> [meta, [], reads] } + ).reads - } else if ( !params.adapter_trimming && params.quality_filtering) { + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FILTLONG.out.log) - ch_processed_reads = FILTLONG ( INPUT_CHECK.out.reads.map { meta, reads -> [meta, [], reads ] } ).reads - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) + } else if (params.adapter_trimming && params.quality_filtering) { + // Both adapter trimming and quality filtering + PORECHOP_ABI(INPUT_CHECK.out.reads) - } else if ( !params.adapter_trimming && !params.quality_filtering) { + ch_clipped_reads = PORECHOP_ABI.out.reads + .map { meta, reads -> [meta + [single_end: 1], reads] } - ch_processed_reads = INPUT_CHECK.out.reads + ch_processed_reads = FILTLONG( + ch_clipped_reads.map { meta, reads -> [meta, [], reads] } + ).reads - } else { - PORECHOP_ABI ( INPUT_CHECK.out.reads ) - ch_clipped_reads = PORECHOP_ABI.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } + ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(PORECHOP_ABI.out.log) + ch_multiqc_files = ch_multiqc_files.mix(FILTLONG.out.log) - ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads + } else { + ch_processed_reads = INPUT_CHECK.out.reads + } - ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_ABI.out.log ) - ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) + } else if (params.seqtype == "sr") { + // Short-read processing + if (!params.skip_cutadapt) { + CUTADAPT(INPUT_CHECK.out.reads) + ch_processed_reads = CUTADAPT.out.reads + ch_versions = ch_versions.mix(CUTADAPT.out.versions.first()) + } else { + ch_processed_reads = INPUT_CHECK.out.reads + } + } else { + error "Invalid seqtype. Please specify either 'map-ont' or 'sr'." } -// PORECHOP_ABI (INPUT_CHECK.out.reads) -// ch_processed_reads = PORECHOP_ABI.out.reads -// .map { meta, reads -> [ meta + [single_end: 1], reads ]} - -// ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - -// CUSTOM_DUMPSOFTWAREVERSIONS ( -// ch_versions.unique().collectFile(name: 'collated_versions.yml') -// ) - - - NANOPLOT2 ( - ch_processed_reads - ) - - - // MODULE: Run EMU_ABUNDANCE - EMU_ABUNDANCE ( - ch_processed_reads - ) + // Run EMU_ABUNDANCE + EMU_ABUNDANCE(ch_processed_reads) ch_versions = ch_versions.mix(EMU_ABUNDANCE.out.versions.first()) - - - if ( params.run_krona ) { - // MODULE: Run KRONA_KTIMPORTTAXONOMY - KRONA_KTIMPORTTAXONOMY (EMU_ABUNDANCE.out.report , file(params.krona_taxonomy_tab, checkExists: true) ) - ch_versions = ch_versions.mix( KRONA_KTIMPORTTAXONOMY.out.versions.first() ) + if (params.run_krona) { + KRONA_KTIMPORTTAXONOMY(EMU_ABUNDANCE.out.report, file(params.krona_taxonomy_tab, checkExists: true)) + ch_versions = ch_versions.mix(KRONA_KTIMPORTTAXONOMY.out.versions.first()) } - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + CUSTOM_DUMPSOFTWAREVERSIONS(ch_versions.unique().collectFile(name: 'collated_versions.yml')) // // MODULE: MultiQC Preproccessed @@ -237,9 +180,11 @@ workflow GMSEMU { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - // testing other tools - ch_multiqc_files = ch_multiqc_files.mix(NANOPLOT1.out.txt.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(NANOPLOT2.out.txt.collect{it[1]}.ifEmpty([])) + + if (params.seqtype == "sr" && !params.skip_cutadapt) { + ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect { it[1] }) + } + MULTIQC ( ch_multiqc_files.collect(), @@ -251,9 +196,6 @@ workflow GMSEMU { } - - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPLETION EMAIL AND SUMMARY @@ -275,3 +217,4 @@ workflow.onComplete { THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +