From 323cd4bce1702bf4b5444c3264136e0ec1b80486 Mon Sep 17 00:00:00 2001 From: Andersson Olivia Date: Mon, 18 Nov 2024 17:30:40 +0100 Subject: [PATCH 1/7] Added separate workflow for short reads with adapter trimming using cutadapt --- .gitignore | 4 + CITATIONS.md | 4 + conf/modules.config | 28 +++- main.nf | 12 +- modules/nf-core/cutadapt/main.nf | 51 ++++++++ modules/nf-core/cutadapt/meta.yml | 58 +++++++++ nextflow.config | 9 +- nextflow_schema.json | 44 +++++++ workflows/gmsemu_sr.nf | 204 ++++++++++++++++++++++++++++++ 9 files changed, 409 insertions(+), 5 deletions(-) create mode 100755 .gitignore create mode 100755 modules/nf-core/cutadapt/main.nf create mode 100755 modules/nf-core/cutadapt/meta.yml create mode 100644 workflows/gmsemu_sr.nf diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..4af8fe5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +assets/databases/emu_database/species_taxid.fasta +assets/databases/emu_database/taxonomy.tsv +assets/databases/krona/taxonomy/images.dmp +assets/databases/krona/taxonomy/taxonomy.tab diff --git a/CITATIONS.md b/CITATIONS.md index 432b20f..1e26ee4 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -20,6 +20,10 @@ This pipeline uses code and infrastructure developed and maintained by the [nf-c - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Cutadapt](https://journal.embnet.org/index.php/embnetjournal/article/view/200/479) + + > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet. journal 17.1 (2011): pp-10. doi: 10.14806/ej.17.1.200. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/conf/modules.config b/conf/modules.config index ccd622a..c0df13f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -25,8 +25,31 @@ process { pattern: 'fastq_pass_merged' ] } +withName: CUTADAPT { + ext.args = { [ + "--minimum-length 1", + "-O ${params.cutadapt_min_overlap}", + "-e ${params.cutadapt_max_error_rate}", + "-g ${params.FW_primer} -G ${params.RV_primer}", + params.retain_untrimmed ? '' : "--discard-untrimmed" + ].join(' ').trim() } + + ext.prefix = { "${meta.id}.trimmed" } + + publishDir = [ + [ path: { "${params.outdir}/cutadapt" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ path: { "${params.outdir}/cutadapt/trimmed_reads" }, + mode: params.publish_dir_mode, + pattern: "*.trim.fastq.gz", + enabled: params.save_intermediates + ] +] - + } +// withName: MERGE_BARCODES_SAMPLESHEET { publishDir = [ path: { "${params.outdir}/fastq_pass_merged" }, @@ -176,7 +199,8 @@ process { ] ] } - + } + diff --git a/main.nf b/main.nf index 02b3e7e..54282ea 100644 --- a/main.nf +++ b/main.nf @@ -33,12 +33,19 @@ WorkflowMain.initialise(workflow, params, log) */ include { GMSEMU } from './workflows/gmsemu' - +include { GMSEMU_sr } from './workflows/gmsemu_sr' // // WORKFLOW: Run main gms_16S analysis pipeline // + workflow GMS_EMU { - GMSEMU () + if (params.seqtype == "map-ont") { + GMSEMU() + } else if (params.seqtype == "sr") { + GMSEMU_sr() + } else { + error "Invalid seqtype. Please specify either 'map-ont' or 'sr'." + } } /* @@ -59,3 +66,4 @@ workflow { THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf new file mode 100755 index 0000000..3d3e571 --- /dev/null +++ b/modules/nf-core/cutadapt/main.nf @@ -0,0 +1,51 @@ +process CUTADAPT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cutadapt:4.6--py39hf95cd2a_1' : + 'biocontainers/cutadapt:4.6--py39hf95cd2a_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.trim.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" + """ + cutadapt \\ + -Z \\ + --cores $task.cpus \\ + $args \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "${prefix}.trim.fastq.gz" : "${prefix}_1.trim.fastq.gz ${prefix}_2.trim.fastq.gz" + """ + touch ${prefix}.cutadapt.log + touch ${trimmed} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cutadapt/meta.yml b/modules/nf-core/cutadapt/meta.yml new file mode 100755 index 0000000..c6f736c --- /dev/null +++ b/modules/nf-core/cutadapt/meta.yml @@ -0,0 +1,58 @@ +me: cutadapt +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - adapter trimming + - adapters + - quality trimming +tools: + - cuatadapt: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. + documentation: https://cutadapt.readthedocs.io/en/stable/index.html + doi: 10.14806/ej.17.1.200 + licence: ["MIT"] + identifier: biotools:cutadapt +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.trim.fastq.gz": + type: file + description: The trimmed/modified fastq reads + pattern: "*fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: cuatadapt log file + pattern: "*cutadapt.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/nextflow.config b/nextflow.config index cc1bdf4..d7a7485 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,8 +22,15 @@ params { keep_files = false output_unclassified = true +//cutadapt + FW_primer = null + RV_primer = null + cutadapt_min_overlap = 3 + cutadapt_max_error_rate = 0.1 + retain_untrimmed = false + skip_cutadapt = false + save_intermediates = false - // // porechop_abi adapter_trimming = false diff --git a/nextflow_schema.json b/nextflow_schema.json index fd960b1..92b30bb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -134,6 +134,50 @@ "description": "minimum mean quality threshold" } } + }, + "cutadapt_options": { + "title": "Cutadapt options", + "type": "object", + "description": "Options for cutadapt which is used for removing adapter sequences", + "default": "", + "properties": { + "FW_primer": { + "type": "string", + "description": "Forward primer" + }, + "RV_primer": { + "type": "string", + "description": "Reverse primer" + }, + "cutadapt_max_error_rate": { + "type": "number", + "default": 0.1, + "description": "Sets the maximum error rate for valid matches of primer sequences with reads for cutadapt (-e)." + }, + + "cutadapt_min_overlap": { + "type": "integer", + "default": 3, + "description": "Minimum overlap for valid matches of primer sequences with reads for cutadapt (-O)." + }, + + "retain_untrimmed": { + "type": "boolean", + "description": "Cutadapt will retain untrimmed reads, choose only if input reads are not expected to contain primer sequences.", + "default": true + }, + "save_intermediates": { + "type": "boolean", + "default": false, + "description": "Save trimmed files from cutadapt " + }, + + "skip_cutadapt": { + "type": "boolean", + "default": false, + "description": "Skip primer trimming with cutadapt" + } + } }, "krona_options": { "title": "krona_options", diff --git a/workflows/gmsemu_sr.nf b/workflows/gmsemu_sr.nf new file mode 100644 index 0000000..cef7e30 --- /dev/null +++ b/workflows/gmsemu_sr.nf @@ -0,0 +1,204 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowGmsemu.initialise(params, log) + +// TODO nf-core: Add all file path parameters for the pipeline to the list below +// Check input path parameters to see if they exist + +// def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] +// for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +def checkPathParamList = [] +if (!params.merge_fastq_pass) { + checkPathParamList += params.input +} +checkPathParamList += [params.multiqc_config, params.fasta] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +// if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +if (params.input) { + ch_input = file(params.input) + } else if (params.merge_fastq_pass) { + // do nothing. + } else { + exit 1, 'Input samplesheet not specified. Unless '--merge_fastq_pass' is used, a sample_sheet.csv must be defined!' + } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { INPUT_CHECK } from '../subworkflows/local/input_check' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf' +include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf' +include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf' +include { EMU_ABUNDANCE } from '../modules/local/emu/abundance/main.nf' +include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main.nf' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { CUTADAPT } from '../modules/nf-core/cutadapt/main.nf' //added + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow GMSEMU_sr { + + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + + if ( params.merge_fastq_pass && !params.barcodes_samplesheet) { + MERGE_BARCODES (params.merge_fastq_pass) + //GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged")) + GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged) + // ch_input = file(params.outdir + 'samplesheet_merged.csv') + ch_input = GENERATE_INPUT.out.sample_sheet_merged + } else if ( params.merge_fastq_pass && params.barcodes_samplesheet) { + MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass) +// merged_files = (params.outdir + '/fastq_pass_merged') + GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) + ch_input = GENERATE_INPUT.out.sample_sheet_merged + } + + + + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + INPUT_CHECK ( + ch_input + ) + ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + + + // + // MODULE: Run FastQC + // + FASTQC ( + INPUT_CHECK.out.reads + ) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + +// CUTADAPT +if (!params.skip_cutadapt) { + // Run cutadapt for sequencing + CUTADAPT (INPUT_CHECK.out.reads) + ch_processed_reads = CUTADAPT.out.reads + + // Track version information + ch_versions = ch_versions.mix(CUTADAPT.out.versions.first()) +} else { + // If --skip_cutadapt is provided, proceed with INPUT_CHECK.out.reads directly + ch_processed_reads = INPUT_CHECK.out.reads +} + + // MODULE: Run EMU_ABUNDANCE + EMU_ABUNDANCE ( + ch_processed_reads + ) + ch_versions = ch_versions.mix(EMU_ABUNDANCE.out.versions.first()) + + + + if ( params.run_krona ) { + // MODULE: Run KRONA_KTIMPORTTAXONOMY + KRONA_KTIMPORTTAXONOMY (EMU_ABUNDANCE.out.report , file(params.krona_taxonomy_tab, checkExists: true) ) + ch_versions = ch_versions.mix( KRONA_KTIMPORTTAXONOMY.out.versions.first() ) + } + + + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC Preproccessed + // + workflow_summary = WorkflowGmsemu.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + methods_description = WorkflowGmsemu.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + multiqc_report = MULTIQC.out.report.toList() + + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + From 2e230c5b267bfe6b94861580716120e7cb11fa9e Mon Sep 17 00:00:00 2001 From: Andersson Olivia Date: Wed, 20 Nov 2024 16:30:30 +0100 Subject: [PATCH 2/7] Added cutadapt summary to MultiQC report --- workflows/gmsemu_sr.nf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/workflows/gmsemu_sr.nf b/workflows/gmsemu_sr.nf index cef7e30..52e06cb 100644 --- a/workflows/gmsemu_sr.nf +++ b/workflows/gmsemu_sr.nf @@ -169,6 +169,11 @@ if (!params.skip_cutadapt) { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + if (!params.skip_cutadapt) { + ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect { it[1] }) + } + + MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), From f68279ad1a79ecd6f0b94d50d31236c0ce084db2 Mon Sep 17 00:00:00 2001 From: Andersson Olivia Date: Mon, 2 Dec 2024 10:21:31 +0100 Subject: [PATCH 3/7] Added option to specify primer in samplesheet --- README.md | 42 +++++++++++++++++++++++----- bin/check_samplesheet.py | 46 +++++++++++++++---------------- conf/modules.config | 17 ++++++++---- subworkflows/local/input_check.nf | 11 +++++--- 4 files changed, 76 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 507ad34..9ed8243 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@ and update software dependencies. ![Pipeline overview image](docs/images/gms_16s_20240415.png) -Roadmap/workflow. Only the NanoPore flow is available. Minor testing has been -done for PacBio and it seems to work. short read has no support yet. MultiQC -collects only info from FastQC and some information about software versions and + The Nanopore and shortread workflow is available. +Minor testing has been done for PacBio and it seems to work. +MultiQC collects only info from FastQC and some information about software versions and pipeline info. ![Krona plot](https://github.com/genomic-medicine-sweden/gms_16S/assets/115690981/dcdd5da4-135c-48c4-b64f-82f0452b5520) @@ -111,18 +111,46 @@ nextflow run main.nf \ --barcodes_samplesheet /[absolute path to barcode sample sheet]/sample_sheet_merge.csv ``` +## Runs with shortreads + +When running gms_16s with short reads, the primer sequences are trimmed using cutadapt by default using the provided primer sequences. +The primer sequences can be provided in the samplesheet or passed as arguments (FW_primer, RV_primer). Primer trimming with cutadapt can be skipped with --skip_cutadapt. + + +```bash +nextflow run main.nf \ + --input sample_sheet.csv + --outdir [absolute path]/gms_16S/results \ + --db /[absolute path]/gms_16S/assets/databases/emu_database \ + --seqtype sr \ + -profile singularity \ + --quality_filtering \ +``` + +```bash +nextflow run main.nf \ + --input sample_sheet.csv + --outdir [absolute path]/gms_16S/results \ + --db /[absolute path]/gms_16S/assets/databases/emu_database \ + --seqtype sr \ + -profile singularity \ + --quality_filtering \ + --FW_primer AGCTGNCCTG\ + --RV_primer TGCATNCTGA +``` + ## Sample sheets There are two types of sample sheets that can be used: 1) If the fastq files are already concatenated/merged i.e., the fastq-files in Nanopore barcode directories have been concataned already, the `--input` can be used. -`--input` expects a `.csv` sample sheet with 3 columns (note the header +`--input` expects a `.csv` sample sheet with 4 columns (note the header names). It looks like this (See also the `examples` directory): ```csv -sample,fastq_1,fastq_2 -SAMPLE_1,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz, -SAMPLE_2,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz, +sample,instrument_platform,fastq_1,fastq_2 +SAMPLE_1,NANOPORE,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz, +SAMPLE_2,NANOPORE,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz, ``` 2) If the fastq files are separated in their respective barcode folder i.e., you diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 11b1557..9f8da11 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,9 +1,7 @@ #!/usr/bin/env python - """Provide a command line tool to validate and transform tabular samplesheets.""" - import argparse import csv import logging @@ -24,14 +22,13 @@ class RowChecker: """ - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) + VALID_FORMATS = (".fq.gz", ".fastq.gz") + ALLOWED_PLATFORMS = {"ILLUMINA", "NANOPORE","MAP-ONT"} def __init__( self, sample_col="sample", + platform_col="instrument_platform", first_col="fastq_1", second_col="fastq_2", single_col="single_end", @@ -43,6 +40,8 @@ def __init__( Args: sample_col (str): The name of the column that contains the sample name (default "sample"). + platform_col (str): The name of the column that contains the sequencing + platform (default "instrument_platform"). first_col (str): The name of the column that contains the first (or only) FASTQ file path (default "fastq_1"). second_col (str): The name of the column that contains the second (if any) @@ -54,6 +53,7 @@ def __init__( """ super().__init__(**kwargs) self._sample_col = sample_col + self._platform_col = platform_col self._first_col = first_col self._second_col = second_col self._single_col = single_col @@ -67,9 +67,9 @@ def validate_and_transform(self, row): Args: row (dict): A mapping from column headers (keys) to elements of that row (values). - """ self._validate_sample(row) + self._validate_platform(row) self._validate_first(row) self._validate_second(row) self._validate_pair(row) @@ -83,6 +83,15 @@ def _validate_sample(self, row): # Sanitize samples slightly. row[self._sample_col] = row[self._sample_col].replace(" ", "_") + def _validate_platform(self, row): + """Assert that the instrument platform exists and is valid.""" + platform = row.get(self._platform_col, "").strip().upper() + if platform not in self.ALLOWED_PLATFORMS: + allowed = ", ".join(self.ALLOWED_PLATFORMS) + raise AssertionError( + f"Instrument platform '{platform}' is invalid. Allowed values: {allowed}." + ) + def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" if len(row[self._first_col]) <= 0: @@ -118,8 +127,7 @@ def validate_unique_samples(self): Assert that the combination of sample name and FASTQ filename is unique. In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - + number of times the same sample exists, but with different FASTQ files, e.g., multiple runs per experiment. """ if len(self._seen) != len(self.modified): raise AssertionError("The pair of sample name and FASTQ must be unique.") @@ -179,28 +187,21 @@ def check_samplesheet(file_in, file_out): be created; always in CSV format. Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, + This function checks that the samplesheet follows the following structure: - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv + sample,instrument_platform,fastq_1,fastq_2 + SAMPLE_PE,ILLUMINA,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz + SAMPLE_PE,ILLUMINA,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz + SAMPLE_SE,NANOPORE,SAMPLE_SE_RUN1_1.fastq.gz, """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + required_columns = {"sample", "instrument_platform", "fastq_1", "fastq_2"} with file_in.open(newline="") as in_handle: reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. if not required_columns.issubset(reader.fieldnames): req_cols = ", ".join(required_columns) logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") sys.exit(1) - # Validate each row. checker = RowChecker() for i, row in enumerate(reader): try: @@ -211,7 +212,6 @@ def check_samplesheet(file_in, file_out): checker.validate_unique_samples() header = list(reader.fieldnames) header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. with file_out.open(mode="w", newline="") as out_handle: writer = csv.DictWriter(out_handle, header, delimiter=",") writer.writeheader() diff --git a/conf/modules.config b/conf/modules.config index c0df13f..6b85e19 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -25,19 +25,22 @@ process { pattern: 'fastq_pass_merged' ] } + withName: CUTADAPT { ext.args = { [ "--minimum-length 1", "-O ${params.cutadapt_min_overlap}", "-e ${params.cutadapt_max_error_rate}", - "-g ${params.FW_primer} -G ${params.RV_primer}", + // Use primers from the samplesheet if available, otherwise fall back to params + meta.fw_primer ? "-g ${meta.fw_primer}" : (params.FW_primer ? "-g ${params.FW_primer}" : ''), + meta.rv_primer ? "-G ${meta.rv_primer}" : (params.RV_primer ? "-G ${params.RV_primer}" : ''), params.retain_untrimmed ? '' : "--discard-untrimmed" - ].join(' ').trim() } + ].findAll { it }.join(' ').trim() } // Remove empty strings and join arguments ext.prefix = { "${meta.id}.trimmed" } - publishDir = [ - [ path: { "${params.outdir}/cutadapt" }, + publishDir = [ + [ path: { "${params.outdir}/cutadapt" }, mode: params.publish_dir_mode, pattern: "*.log" ], @@ -46,9 +49,11 @@ withName: CUTADAPT { pattern: "*.trim.fastq.gz", enabled: params.save_intermediates ] -] + ] +} + + - } // withName: MERGE_BARCODES_SAMPLESHEET { publishDir = [ diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 0aecf87..4d183fa 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -22,12 +22,15 @@ workflow INPUT_CHECK { // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] def create_fastq_channel(LinkedHashMap row) { - // create meta map + // Create meta map def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + meta.instrument_platform = row.instrument_platform + meta.fw_primer = row.FW_primer + meta.rv_primer = row.RV_primer - // add path(s) of the fastq file(s) to the meta map + // Add path(s) of the fastq file(s) to the meta map def fastq_meta = [] if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" From 0fd08885c4f295906636f05cd31415f757bc7fa9 Mon Sep 17 00:00:00 2001 From: Andersson Olivia Date: Fri, 10 Jan 2025 13:12:04 +0100 Subject: [PATCH 4/7] . --- README.md | 15 ++- bin/check_samplesheet.py | 46 +++---- main.nf | 12 +- subworkflows/local/input_check.nf | 1 - workflows/gmsemu.nf | 202 +++++++++-------------------- workflows/gmsemu_sr.nf | 209 ------------------------------ 6 files changed, 98 insertions(+), 387 deletions(-) delete mode 100644 workflows/gmsemu_sr.nf diff --git a/README.md b/README.md index 9ed8243..6e6cf16 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Longfilt, EMU, and Krona. EMU is the tool that does the taxonomic profiling of ensures portability and reproducibility across different computational infrastructures. It has been tested on Linux and on mac M1 (not recommended, quite slow). FastQC and Nanoplot performs quality control, Porechop_ABI trims -adapters (optional)), Longfilt filters the fastq-files such that only reads +adapters (optional), Longfilt filters the fastq-files such that only reads that are close to 1500 bp are used (optional), EMU assigns taxonomic classifications, and Krona visualises the result table from EMU. The pipeline enables microbial community analysis, offering insights into the diversity in @@ -116,6 +116,11 @@ nextflow run main.nf \ When running gms_16s with short reads, the primer sequences are trimmed using cutadapt by default using the provided primer sequences. The primer sequences can be provided in the samplesheet or passed as arguments (FW_primer, RV_primer). Primer trimming with cutadapt can be skipped with --skip_cutadapt. +```bash +sample,fastq_1,fastq_2,FW_primer,RV_primer +SAMPLE,/absolute_path/gms_16s/Sample_R1_001.fastq.gz,/absolute_path/gms_16s/Sample_R2_001.fastq.gz,GTGCCAGCMGCCGCGGTAA,GGACTACNVGGGTWTCTAAT +``` + ```bash nextflow run main.nf \ @@ -139,6 +144,8 @@ nextflow run main.nf \ --RV_primer TGCATNCTGA ``` + + ## Sample sheets There are two types of sample sheets that can be used: 1) If the fastq files @@ -148,9 +155,9 @@ directories have been concataned already, the `--input` can be used. names). It looks like this (See also the `examples` directory): ```csv -sample,instrument_platform,fastq_1,fastq_2 -SAMPLE_1,NANOPORE,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz, -SAMPLE_2,NANOPORE,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz, +sample,fastq_1,fastq_2 +SAMPLE_1,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz, +SAMPLE_2,/absolute_path/gms_16S/assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz, ``` 2) If the fastq files are separated in their respective barcode folder i.e., you diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 9f8da11..11b1557 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,7 +1,9 @@ #!/usr/bin/env python + """Provide a command line tool to validate and transform tabular samplesheets.""" + import argparse import csv import logging @@ -22,13 +24,14 @@ class RowChecker: """ - VALID_FORMATS = (".fq.gz", ".fastq.gz") - ALLOWED_PLATFORMS = {"ILLUMINA", "NANOPORE","MAP-ONT"} + VALID_FORMATS = ( + ".fq.gz", + ".fastq.gz", + ) def __init__( self, sample_col="sample", - platform_col="instrument_platform", first_col="fastq_1", second_col="fastq_2", single_col="single_end", @@ -40,8 +43,6 @@ def __init__( Args: sample_col (str): The name of the column that contains the sample name (default "sample"). - platform_col (str): The name of the column that contains the sequencing - platform (default "instrument_platform"). first_col (str): The name of the column that contains the first (or only) FASTQ file path (default "fastq_1"). second_col (str): The name of the column that contains the second (if any) @@ -53,7 +54,6 @@ def __init__( """ super().__init__(**kwargs) self._sample_col = sample_col - self._platform_col = platform_col self._first_col = first_col self._second_col = second_col self._single_col = single_col @@ -67,9 +67,9 @@ def validate_and_transform(self, row): Args: row (dict): A mapping from column headers (keys) to elements of that row (values). + """ self._validate_sample(row) - self._validate_platform(row) self._validate_first(row) self._validate_second(row) self._validate_pair(row) @@ -83,15 +83,6 @@ def _validate_sample(self, row): # Sanitize samples slightly. row[self._sample_col] = row[self._sample_col].replace(" ", "_") - def _validate_platform(self, row): - """Assert that the instrument platform exists and is valid.""" - platform = row.get(self._platform_col, "").strip().upper() - if platform not in self.ALLOWED_PLATFORMS: - allowed = ", ".join(self.ALLOWED_PLATFORMS) - raise AssertionError( - f"Instrument platform '{platform}' is invalid. Allowed values: {allowed}." - ) - def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" if len(row[self._first_col]) <= 0: @@ -127,7 +118,8 @@ def validate_unique_samples(self): Assert that the combination of sample name and FASTQ filename is unique. In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exists, but with different FASTQ files, e.g., multiple runs per experiment. + number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. + """ if len(self._seen) != len(self.modified): raise AssertionError("The pair of sample name and FASTQ must be unique.") @@ -187,21 +179,28 @@ def check_samplesheet(file_in, file_out): be created; always in CSV format. Example: - This function checks that the samplesheet follows the following structure: + This function checks that the samplesheet follows the following structure, + see also the `viral recon samplesheet`_:: + + sample,fastq_1,fastq_2 + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz + SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz + SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - sample,instrument_platform,fastq_1,fastq_2 - SAMPLE_PE,ILLUMINA,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,ILLUMINA,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,NANOPORE,SAMPLE_SE_RUN1_1.fastq.gz, + .. _viral recon samplesheet: + https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv """ - required_columns = {"sample", "instrument_platform", "fastq_1", "fastq_2"} + required_columns = {"sample", "fastq_1", "fastq_2"} + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. with file_in.open(newline="") as in_handle: reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) + # Validate the existence of the expected header columns. if not required_columns.issubset(reader.fieldnames): req_cols = ", ".join(required_columns) logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") sys.exit(1) + # Validate each row. checker = RowChecker() for i, row in enumerate(reader): try: @@ -212,6 +211,7 @@ def check_samplesheet(file_in, file_out): checker.validate_unique_samples() header = list(reader.fieldnames) header.insert(1, "single_end") + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. with file_out.open(mode="w", newline="") as out_handle: writer = csv.DictWriter(out_handle, header, delimiter=",") writer.writeheader() diff --git a/main.nf b/main.nf index 54282ea..02b3e7e 100644 --- a/main.nf +++ b/main.nf @@ -33,19 +33,12 @@ WorkflowMain.initialise(workflow, params, log) */ include { GMSEMU } from './workflows/gmsemu' -include { GMSEMU_sr } from './workflows/gmsemu_sr' + // // WORKFLOW: Run main gms_16S analysis pipeline // - workflow GMS_EMU { - if (params.seqtype == "map-ont") { - GMSEMU() - } else if (params.seqtype == "sr") { - GMSEMU_sr() - } else { - error "Invalid seqtype. Please specify either 'map-ont' or 'sr'." - } + GMSEMU () } /* @@ -66,4 +59,3 @@ workflow { THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 4d183fa..454a51a 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -26,7 +26,6 @@ def create_fastq_channel(LinkedHashMap row) { def meta = [:] meta.id = row.sample meta.single_end = row.single_end.toBoolean() - meta.instrument_platform = row.instrument_platform meta.fw_primer = row.FW_primer meta.rv_primer = row.RV_primer diff --git a/workflows/gmsemu.nf b/workflows/gmsemu.nf index 022e2e5..e55dad9 100644 --- a/workflows/gmsemu.nf +++ b/workflows/gmsemu.nf @@ -9,12 +9,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) // Validate input parameters WorkflowGmsemu.initialise(params, log) -// TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist - -// def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -// for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - def checkPathParamList = [] if (!params.merge_fastq_pass) { checkPathParamList += params.input @@ -23,14 +18,13 @@ checkPathParamList += [params.multiqc_config, params.fasta] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters -// if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.input) { ch_input = file(params.input) - } else if (params.merge_fastq_pass) { - // do nothing. - } else { - exit 1, 'Input samplesheet not specified. Unless '--merge_fastq_pass' is used, a sample_sheet.csv must be defined!' - } +} else if (params.merge_fastq_pass) { + // Do nothing. +} else { + exit 1, "Input samplesheet not specified. Unless '--merge_fastq_pass' is used, a sample_sheet.csv must be defined!" +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -39,8 +33,8 @@ if (params.input) { */ ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* @@ -49,9 +43,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// include { INPUT_CHECK } from '../subworkflows/local/input_check' /* @@ -60,87 +51,46 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Installed directly from nf-core/modules -// include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf' include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf' include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf' -//include { FALCO } from '../modules/nf-core/falco/main.nf' -include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf' -include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf' -include { PORECHOP_ABI } from '../modules/nf-core/porechop/abi/main.nf' -include { FILTLONG } from '../modules/nf-core/filtlong/main.nf' include { EMU_ABUNDANCE } from '../modules/local/emu/abundance/main.nf' include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main.nf' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { FASTQC } from '../modules/nf-core/fastqc/main' +include { CUTADAPT } from '../modules/nf-core/cutadapt/main.nf' +include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf' +include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf' +include { PORECHOP_ABI } from '../modules/nf-core/porechop/abi/main.nf' +include { FILTLONG } from '../modules/nf-core/filtlong/main.nf' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow GMSEMU { - ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - - if ( params.merge_fastq_pass && !params.barcodes_samplesheet) { - MERGE_BARCODES (params.merge_fastq_pass) - //GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged")) + // Merge fastq and generate input based on seqtype + if (params.merge_fastq_pass && !params.barcodes_samplesheet) { + MERGE_BARCODES(params.merge_fastq_pass) GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged) - // ch_input = file(params.outdir + 'samplesheet_merged.csv') ch_input = GENERATE_INPUT.out.sample_sheet_merged - } else if ( params.merge_fastq_pass && params.barcodes_samplesheet) { - MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass) -// merged_files = (params.outdir + '/fastq_pass_merged') - GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) + } else if (params.merge_fastq_pass && params.barcodes_samplesheet) { + MERGE_BARCODES_SAMPLESHEET(params.barcodes_samplesheet, params.merge_fastq_pass) + GENERATE_INPUT(MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) ch_input = GENERATE_INPUT.out.sample_sheet_merged } - - - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) + // Validate and stage input files + INPUT_CHECK(ch_input) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - - // - // MODULE: Run Falco - // FALCO ( - // INPUT_CHECK.out.reads - // ) - - - - // - // MODULE: Run Nanoplot1 - NANOPLOT1 ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(NANOPLOT1.out.versions.first()) - - - - // NANOPLOT2 ( - // INPUT_CHECK.out.reads - // ) - - - // // MODULE: Run FastQC // @@ -151,77 +101,50 @@ workflow GMSEMU { - - // MODULE: Run PORECHOP_ABI and filtering - // - if ( params.adapter_trimming && !params.quality_filtering) { - PORECHOP_ABI ( INPUT_CHECK.out.reads ) - - ch_processed_reads = PORECHOP_ABI.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } - - ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_ABI.out.log ) - - } else if ( !params.adapter_trimming && params.quality_filtering) { - - ch_processed_reads = FILTLONG ( INPUT_CHECK.out.reads.map { meta, reads -> [meta, [], reads ] } ).reads - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) - - } else if ( !params.adapter_trimming && !params.quality_filtering) { - - ch_processed_reads = INPUT_CHECK.out.reads - + // Processing based on seqtype + if (params.seqtype == "map-ont") { + // Long-read processing + NANOPLOT1(INPUT_CHECK.out.reads) + ch_versions = ch_versions.mix(NANOPLOT1.out.versions.first()) + + if (params.adapter_trimming) { + PORECHOP_ABI(INPUT_CHECK.out.reads) + ch_clipped_reads = PORECHOP_ABI.out.reads.map { meta, reads -> [meta + [single_end: 1], reads] } + ch_processed_reads = FILTLONG(ch_clipped_reads.map { meta, reads -> [meta, [], reads] }).reads + + ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + } else { + ch_processed_reads = INPUT_CHECK.out.reads + } + + NANOPLOT2(ch_processed_reads) + ch_versions = ch_versions.mix(NANOPLOT2.out.versions.first()) + + } else if (params.seqtype == "sr") { + // Short-read processing + if (!params.skip_cutadapt) { + CUTADAPT(INPUT_CHECK.out.reads) + ch_processed_reads = CUTADAPT.out.reads + ch_versions = ch_versions.mix(CUTADAPT.out.versions.first()) + } else { + ch_processed_reads = INPUT_CHECK.out.reads + } } else { - PORECHOP_ABI ( INPUT_CHECK.out.reads ) - ch_clipped_reads = PORECHOP_ABI.out.reads - .map { meta, reads -> [ meta + [single_end: 1], reads ] } - - ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads - - ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_ABI.out.log ) - ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) + error "Invalid seqtype. Please specify either 'map-ont' or 'sr'." } -// PORECHOP_ABI (INPUT_CHECK.out.reads) -// ch_processed_reads = PORECHOP_ABI.out.reads -// .map { meta, reads -> [ meta + [single_end: 1], reads ]} - -// ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - -// CUSTOM_DUMPSOFTWAREVERSIONS ( -// ch_versions.unique().collectFile(name: 'collated_versions.yml') -// ) - - - - NANOPLOT2 ( - ch_processed_reads - ) - - - // MODULE: Run EMU_ABUNDANCE - EMU_ABUNDANCE ( - ch_processed_reads - ) + // Run EMU_ABUNDANCE + EMU_ABUNDANCE(ch_processed_reads) ch_versions = ch_versions.mix(EMU_ABUNDANCE.out.versions.first()) - - - if ( params.run_krona ) { - // MODULE: Run KRONA_KTIMPORTTAXONOMY - KRONA_KTIMPORTTAXONOMY (EMU_ABUNDANCE.out.report , file(params.krona_taxonomy_tab, checkExists: true) ) - ch_versions = ch_versions.mix( KRONA_KTIMPORTTAXONOMY.out.versions.first() ) + if (params.run_krona) { + KRONA_KTIMPORTTAXONOMY(EMU_ABUNDANCE.out.report, file(params.krona_taxonomy_tab, checkExists: true)) + ch_versions = ch_versions.mix(KRONA_KTIMPORTTAXONOMY.out.versions.first()) } - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + CUSTOM_DUMPSOFTWAREVERSIONS(ch_versions.unique().collectFile(name: 'collated_versions.yml')) // // MODULE: MultiQC Preproccessed @@ -237,9 +160,11 @@ workflow GMSEMU { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - // testing other tools - ch_multiqc_files = ch_multiqc_files.mix(NANOPLOT1.out.txt.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(NANOPLOT2.out.txt.collect{it[1]}.ifEmpty([])) + + if (params.seqtype == "sr" && !params.skip_cutadapt) { + ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect { it[1] }) + } + MULTIQC ( ch_multiqc_files.collect(), @@ -251,9 +176,6 @@ workflow GMSEMU { } - - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPLETION EMAIL AND SUMMARY diff --git a/workflows/gmsemu_sr.nf b/workflows/gmsemu_sr.nf deleted file mode 100644 index 52e06cb..0000000 --- a/workflows/gmsemu_sr.nf +++ /dev/null @@ -1,209 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowGmsemu.initialise(params, log) - -// TODO nf-core: Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist - -// def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -// for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -def checkPathParamList = [] -if (!params.merge_fastq_pass) { - checkPathParamList += params.input -} -checkPathParamList += [params.multiqc_config, params.fasta] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -// if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -if (params.input) { - ch_input = file(params.input) - } else if (params.merge_fastq_pass) { - // do nothing. - } else { - exit 1, 'Input samplesheet not specified. Unless '--merge_fastq_pass' is used, a sample_sheet.csv must be defined!' - } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf' -include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf' -include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf' -include { EMU_ABUNDANCE } from '../modules/local/emu/abundance/main.nf' -include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main.nf' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { CUTADAPT } from '../modules/nf-core/cutadapt/main.nf' //added - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Info required for completion email and summary -def multiqc_report = [] - -workflow GMSEMU_sr { - - - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - - - if ( params.merge_fastq_pass && !params.barcodes_samplesheet) { - MERGE_BARCODES (params.merge_fastq_pass) - //GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged")) - GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged) - // ch_input = file(params.outdir + 'samplesheet_merged.csv') - ch_input = GENERATE_INPUT.out.sample_sheet_merged - } else if ( params.merge_fastq_pass && params.barcodes_samplesheet) { - MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass) -// merged_files = (params.outdir + '/fastq_pass_merged') - GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) - ch_input = GENERATE_INPUT.out.sample_sheet_merged - } - - - - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - -// CUTADAPT -if (!params.skip_cutadapt) { - // Run cutadapt for sequencing - CUTADAPT (INPUT_CHECK.out.reads) - ch_processed_reads = CUTADAPT.out.reads - - // Track version information - ch_versions = ch_versions.mix(CUTADAPT.out.versions.first()) -} else { - // If --skip_cutadapt is provided, proceed with INPUT_CHECK.out.reads directly - ch_processed_reads = INPUT_CHECK.out.reads -} - - // MODULE: Run EMU_ABUNDANCE - EMU_ABUNDANCE ( - ch_processed_reads - ) - ch_versions = ch_versions.mix(EMU_ABUNDANCE.out.versions.first()) - - - - if ( params.run_krona ) { - // MODULE: Run KRONA_KTIMPORTTAXONOMY - KRONA_KTIMPORTTAXONOMY (EMU_ABUNDANCE.out.report , file(params.krona_taxonomy_tab, checkExists: true) ) - ch_versions = ch_versions.mix( KRONA_KTIMPORTTAXONOMY.out.versions.first() ) - } - - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC Preproccessed - // - workflow_summary = WorkflowGmsemu.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowGmsemu.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - if (!params.skip_cutadapt) { - ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect { it[1] }) - } - - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() - - -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - From 490256664f646e425b1f31886da8925200b71ab0 Mon Sep 17 00:00:00 2001 From: Andersson Olivia Date: Fri, 10 Jan 2025 13:38:38 +0100 Subject: [PATCH 5/7] . --- workflows/gmsemu.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/gmsemu.nf b/workflows/gmsemu.nf index e55dad9..e7b8931 100644 --- a/workflows/gmsemu.nf +++ b/workflows/gmsemu.nf @@ -107,7 +107,7 @@ workflow GMSEMU { NANOPLOT1(INPUT_CHECK.out.reads) ch_versions = ch_versions.mix(NANOPLOT1.out.versions.first()) - if (params.adapter_trimming) { + if (params.adapter_trimming && !params.quality_filtering ) { PORECHOP_ABI(INPUT_CHECK.out.reads) ch_clipped_reads = PORECHOP_ABI.out.reads.map { meta, reads -> [meta + [single_end: 1], reads] } ch_processed_reads = FILTLONG(ch_clipped_reads.map { meta, reads -> [meta, [], reads] }).reads @@ -197,3 +197,4 @@ workflow.onComplete { THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + From fada7164ff9a00b15109d39de0f913d54f8b4a13 Mon Sep 17 00:00:00 2001 From: Andersson Olivia Date: Fri, 24 Jan 2025 14:00:33 +0100 Subject: [PATCH 6/7] added quay.io in the module-file --- README.md | 4 +--- modules/nf-core/cutadapt/main.nf | 2 +- workflows/gmsemu.nf | 6 ++++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6e6cf16..60186d9 100644 --- a/README.md +++ b/README.md @@ -128,8 +128,7 @@ nextflow run main.nf \ --outdir [absolute path]/gms_16S/results \ --db /[absolute path]/gms_16S/assets/databases/emu_database \ --seqtype sr \ - -profile singularity \ - --quality_filtering \ + -profile singularity ``` ```bash @@ -139,7 +138,6 @@ nextflow run main.nf \ --db /[absolute path]/gms_16S/assets/databases/emu_database \ --seqtype sr \ -profile singularity \ - --quality_filtering \ --FW_primer AGCTGNCCTG\ --RV_primer TGCATNCTGA ``` diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf index 3d3e571..8d168ef 100755 --- a/modules/nf-core/cutadapt/main.nf +++ b/modules/nf-core/cutadapt/main.nf @@ -5,7 +5,7 @@ process CUTADAPT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/cutadapt:4.6--py39hf95cd2a_1' : - 'biocontainers/cutadapt:4.6--py39hf95cd2a_1' }" + 'quay.io/biocontainers/cutadapt:4.6--py39hf95cd2a_1' }" input: tuple val(meta), path(reads) diff --git a/workflows/gmsemu.nf b/workflows/gmsemu.nf index e7b8931..902da04 100644 --- a/workflows/gmsemu.nf +++ b/workflows/gmsemu.nf @@ -107,6 +107,10 @@ workflow GMSEMU { NANOPLOT1(INPUT_CHECK.out.reads) ch_versions = ch_versions.mix(NANOPLOT1.out.versions.first()) + // NANOPLOT2 ( + // INPUT_CHECK.out.reads + // ) + if (params.adapter_trimming && !params.quality_filtering ) { PORECHOP_ABI(INPUT_CHECK.out.reads) ch_clipped_reads = PORECHOP_ABI.out.reads.map { meta, reads -> [meta + [single_end: 1], reads] } @@ -118,8 +122,6 @@ workflow GMSEMU { ch_processed_reads = INPUT_CHECK.out.reads } - NANOPLOT2(ch_processed_reads) - ch_versions = ch_versions.mix(NANOPLOT2.out.versions.first()) } else if (params.seqtype == "sr") { // Short-read processing From 46dffc5d81007468c6f6403f7cddd015ba3e8bfd Mon Sep 17 00:00:00 2001 From: Andersson Olivia Date: Fri, 24 Jan 2025 15:43:27 +0100 Subject: [PATCH 7/7] Fixed requested changes --- conf/modules.config | 57 +++++++++++++++++++++------------------------ workflows/gmsemu.nf | 40 ++++++++++++++++++++++--------- 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6b85e19..620ce14 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -26,35 +26,7 @@ process { ] } -withName: CUTADAPT { - ext.args = { [ - "--minimum-length 1", - "-O ${params.cutadapt_min_overlap}", - "-e ${params.cutadapt_max_error_rate}", - // Use primers from the samplesheet if available, otherwise fall back to params - meta.fw_primer ? "-g ${meta.fw_primer}" : (params.FW_primer ? "-g ${params.FW_primer}" : ''), - meta.rv_primer ? "-G ${meta.rv_primer}" : (params.RV_primer ? "-G ${params.RV_primer}" : ''), - params.retain_untrimmed ? '' : "--discard-untrimmed" - ].findAll { it }.join(' ').trim() } // Remove empty strings and join arguments - - ext.prefix = { "${meta.id}.trimmed" } - - publishDir = [ - [ path: { "${params.outdir}/cutadapt" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ path: { "${params.outdir}/cutadapt/trimmed_reads" }, - mode: params.publish_dir_mode, - pattern: "*.trim.fastq.gz", - enabled: params.save_intermediates - ] - ] -} - - -// withName: MERGE_BARCODES_SAMPLESHEET { publishDir = [ path: { "${params.outdir}/fastq_pass_merged" }, @@ -120,6 +92,32 @@ withName: CUTADAPT { } + withName: CUTADAPT { + ext.args = { [ + "--minimum-length 1", + "-O ${params.cutadapt_min_overlap}", + "-e ${params.cutadapt_max_error_rate}", + // Use primers from the samplesheet if available, otherwise fall back to params + meta.fw_primer ? "-g ${meta.fw_primer}" : (params.FW_primer ? "-g ${params.FW_primer}" : ''), + meta.rv_primer ? "-G ${meta.rv_primer}" : (params.RV_primer ? "-G ${params.RV_primer}" : ''), + params.retain_untrimmed ? '' : "--discard-untrimmed" + ].findAll { it }.join(' ').trim() } // Remove empty strings and join arguments + + ext.prefix = { "${meta.id}.trimmed" } + + publishDir = [ + [ path: { "${params.outdir}/cutadapt" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ path: { "${params.outdir}/cutadapt/trimmed_reads" }, + mode: params.publish_dir_mode, + pattern: "*.trim.fastq.gz", + enabled: params.save_intermediates + ] + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { @@ -204,8 +202,7 @@ withName: CUTADAPT { ] ] } - + } - diff --git a/workflows/gmsemu.nf b/workflows/gmsemu.nf index 902da04..d9b3e49 100644 --- a/workflows/gmsemu.nf +++ b/workflows/gmsemu.nf @@ -99,30 +99,47 @@ workflow GMSEMU { ) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - - // Processing based on seqtype if (params.seqtype == "map-ont") { - // Long-read processing NANOPLOT1(INPUT_CHECK.out.reads) ch_versions = ch_versions.mix(NANOPLOT1.out.versions.first()) - // NANOPLOT2 ( - // INPUT_CHECK.out.reads - // ) + if (params.adapter_trimming && !params.quality_filtering) { + PORECHOP_ABI(INPUT_CHECK.out.reads) + + ch_processed_reads = PORECHOP_ABI.out.reads + .map { meta, reads -> [meta + [single_end: 1], reads] } + + ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(PORECHOP_ABI.out.log) + + } else if (!params.adapter_trimming && params.quality_filtering) { + ch_processed_reads = FILTLONG( + INPUT_CHECK.out.reads.map { meta, reads -> [meta, [], reads] } + ).reads - if (params.adapter_trimming && !params.quality_filtering ) { + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FILTLONG.out.log) + + } else if (params.adapter_trimming && params.quality_filtering) { + // Both adapter trimming and quality filtering PORECHOP_ABI(INPUT_CHECK.out.reads) - ch_clipped_reads = PORECHOP_ABI.out.reads.map { meta, reads -> [meta + [single_end: 1], reads] } - ch_processed_reads = FILTLONG(ch_clipped_reads.map { meta, reads -> [meta, [], reads] }).reads + + ch_clipped_reads = PORECHOP_ABI.out.reads + .map { meta, reads -> [meta + [single_end: 1], reads] } + + ch_processed_reads = FILTLONG( + ch_clipped_reads.map { meta, reads -> [meta, [], reads] } + ).reads ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(PORECHOP_ABI.out.log) + ch_multiqc_files = ch_multiqc_files.mix(FILTLONG.out.log) + } else { ch_processed_reads = INPUT_CHECK.out.reads } - } else if (params.seqtype == "sr") { // Short-read processing if (!params.skip_cutadapt) { @@ -137,6 +154,7 @@ workflow GMSEMU { } + // Run EMU_ABUNDANCE EMU_ABUNDANCE(ch_processed_reads) ch_versions = ch_versions.mix(EMU_ABUNDANCE.out.versions.first())