diff --git a/README.md b/README.md
index 0964e70c..ddffa516 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,8 @@ It also performs basic QC and coverage analysis.
The pipeline is built using Nextflow, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible.
+The pipeline also supports Unique Molecular Identifier (UMI) data. If your samplesheet includes a `umi_type` column (`seq` or `readname`), UMI-aware preprocessing is enabled automatically. Rows with no `umi_type` specified will be processed as non-UMI sequencing data.
+
Steps inlcude:
1. Demultiplexing using [`BCLconvert`](https://emea.support.illumina.com/sequencing/sequencing_software/bcl-convert.html)
@@ -24,7 +26,13 @@ Steps inlcude:
6. Alignment QC using [`samtools flagstat`](http://www.htslib.org/doc/samtools-flagstat.html), [`samtools stats`](http://www.htslib.org/doc/samtools-stats.html), [`samtools idxstats`](http://www.htslib.org/doc/samtools-idxstats.html) and [`picard CollecHsMetrics`](https://broadinstitute.github.io/picard/command-line-overview.html#CollectHsMetrics), [`picard CollectWgsMetrics`](https://broadinstitute.github.io/picard/command-line-overview.html#CollectWgsMetrics), [`picard CollectMultipleMetrics`](https://broadinstitute.github.io/picard/command-line-overview.html#CollectMultipleMetrics)
7. QC aggregation using [`multiqc`](https://multiqc.info/)
-
+
+
+UMI processing (only for rows with `umi_type`):
+- Extract UMI from read sequence (`seq`) or read name (`readname`)
+- Group reads by UMI (fgbio GroupReadsByUmi)
+- Call molecular consensus (fgbio CallMolecularConsensusReads) and filter (fgbio FilterConsensusReads)
+- Re-align filtered consensus reads with BWA-MEM (`-Y`), then sort/index
## Usage
@@ -41,6 +49,12 @@ First, prepare a samplesheet with your input data that looks as follows:
id,samplename,organism,library,fastq_1,fastq_2
sample1,sample1,Homo sapiens,Library_Name,reads1.fq.gz,reads2.fq.gz
```
+`samplesheet.csv` for fastq inputs with UMI:
+
+```csv
+id,samplename,organism,library,umi_type,fastq_1,fastq_2
+umi_sample1,umi_sample1,Homo sapiens,Library_Name,seq,reads1.fq.gz,reads2.fq.gz
+```
`samplesheet.csv` for flowcell inputs:
diff --git a/assets/schema_input.json b/assets/schema_input.json
index beaa2eb6..73e9e721 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -72,6 +72,13 @@
"pattern": "^[a-zA-Z0-9_]+.bed$",
"default": null
},
+ "umi_type": {
+ "meta": ["umi_type"],
+ "type": "string",
+ "description": "Distinguish UMI samples (readname, seq) from non-UMI samples",
+ "enum": ["readname", "seq", "none"],
+ "default": "none"
+ },
"lane": {
"type": "integer",
"meta": ["lane"],
@@ -110,13 +117,13 @@
},
"anyOf": [
{
- "required": ["id", "samplename", "organism", "tag", "fastq_1", "fastq_2"]
+ "required": ["id", "samplename", "organism", "tag", "fastq_1", "fastq_2", "umi_type"]
},
{
- "required": ["id", "samplename", "genome", "tag", "fastq_1", "fastq_2"]
+ "required": ["id", "samplename", "genome", "tag", "fastq_1", "fastq_2", "umi_type"]
},
{
- "required": ["id", "samplesheet", "sample_info", "flowcell"]
+ "required": ["id", "samplesheet", "sample_info", "flowcell", "umi_type"]
}
]
},
diff --git a/docs/images/metro_mapumi.png b/docs/images/metro_mapumi.png
new file mode 100644
index 00000000..56b43b75
Binary files /dev/null and b/docs/images/metro_mapumi.png differ
diff --git a/docs/images/metro_mapumi.svg b/docs/images/metro_mapumi.svg
new file mode 100644
index 00000000..73f54ac2
--- /dev/null
+++ b/docs/images/metro_mapumi.svg
@@ -0,0 +1,1351 @@
+
+
+
+
diff --git a/modules.json b/modules.json
index d860bfc6..adcf495d 100644
--- a/modules.json
+++ b/modules.json
@@ -51,6 +51,55 @@
"installed_by": ["modules"],
"patch": "modules/nf-core/fastp/fastp.diff"
},
+ "fgbio/callduplexconsensusreads": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"]
+ },
+ "fgbio/callmolecularconsensusreads": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"]
+ },
+ "fgbio/collectduplexseqmetrics": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"]
+ },
+ "fgbio/copyumifromreadname": {
+ "branch": "master",
+ "git_sha": "47dbfc0fbcd8e4e3b73d843f4659069441ca8692",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/fgbio/copyumifromreadname/fgbio-copyumifromreadname.diff"
+ },
+ "fgbio/fastqtobam": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/fgbio/fastqtobam/fgbio-fastqtobam.diff"
+ },
+ "fgbio/filterconsensusreads": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/fgbio/filterconsensusreads/fgbio-filterconsensusreads.diff"
+ },
+ "fgbio/groupreadsbyumi": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"]
+ },
+ "fgbio/sortbam": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"]
+ },
+ "fgbio/zipperbams": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/fgbio/zipperbams/fgbio-zipperbams.diff"
+ },
"md5sum": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
@@ -103,6 +152,11 @@
"installed_by": ["modules"],
"patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff"
},
+ "samtools/fastq": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"]
+ },
"samtools/flagstat": {
"branch": "master",
"git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540",
@@ -118,6 +172,11 @@
"git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540",
"installed_by": ["modules"]
},
+ "samtools/index": {
+ "branch": "master",
+ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+ "installed_by": ["modules"]
+ },
"samtools/sormadup": {
"branch": "master",
"git_sha": "38f3b0200093498b70ac2d63a83eac5642e3c873",
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/environment.yml b/modules/nf-core/fgbio/callduplexconsensusreads/environment.yml
new file mode 100644
index 00000000..4dbb6856
--- /dev/null
+++ b/modules/nf-core/fgbio/callduplexconsensusreads/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.5.21
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/main.nf b/modules/nf-core/fgbio/callduplexconsensusreads/main.nf
new file mode 100644
index 00000000..be6fc97a
--- /dev/null
+++ b/modules/nf-core/fgbio/callduplexconsensusreads/main.nf
@@ -0,0 +1,70 @@
+process FGBIO_CALLDUPLEXCONSENSUSREADS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+ tuple val(meta), path(grouped_bam)
+ val min_reads
+ val min_baseq
+
+ output:
+ tuple val(meta), path("${prefix}.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
+
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio CallDuplexConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+ if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+ """
+ fgbio \\
+ -Xmx${mem_gb}g \\
+ --tmp-dir=. \\
+ --async-io=true \\
+ --compression=1 \\
+ CallDuplexConsensusReads \\
+ --input $grouped_bam \\
+ --output ${prefix}.bam \\
+ --min-reads ${min_reads} \\
+ --min-input-base-quality ${min_baseq} \\
+ --threads ${task.cpus} \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
+ if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/meta.yml b/modules/nf-core/fgbio/callduplexconsensusreads/meta.yml
new file mode 100644
index 00000000..3b615eda
--- /dev/null
+++ b/modules/nf-core/fgbio/callduplexconsensusreads/meta.yml
@@ -0,0 +1,57 @@
+name: "fgbio_callduplexconsensusreads"
+description: Uses FGBIO CallDuplexConsensusReads to call duplex consensus sequences
+ from reads generated from the same double-stranded source molecule.
+keywords:
+ - umi
+ - duplex
+ - fgbio
+tools:
+ - "fgbio":
+ description: "A set of tools for working with genomic and high throughput sequencing
+ data, including UMIs"
+ homepage: http://fulcrumgenomics.github.io/fgbio/
+ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/CallDuplexConsensusReads.html
+ tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - grouped_bam:
+ type: file
+ description: Grouped BAM file
+ pattern: "*.bam"
+ ontologies: []
+ - min_reads:
+ type: string
+ description: Minimum number of raw/original reads to build each consensus read. Can
+ be a space delimited list of 1-3 values. See fgbio documentation for more details.
+ - min_baseq:
+ type: integer
+ description: Ignore bases in raw reads that have Q below this value
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.bam:
+ type: file
+ description: consensus BAM file
+ pattern: "*.bam"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@lescai"
+maintainers:
+ - "@lescai"
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test b/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test
new file mode 100644
index 00000000..0144e0ea
--- /dev/null
+++ b/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test
@@ -0,0 +1,62 @@
+nextflow_process {
+
+ name "Test Process FGBIO_CALLDUPLEXCONSENSUSREADS"
+ script "../main.nf"
+ process "FGBIO_CALLDUPLEXCONSENSUSREADS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/callduplexconsensusreads"
+
+ test("homo_sapiens - bam") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
+ ]
+ input[1] = 3
+ input[2] = 20
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
+ ]
+ input[1] = 3
+ input[2] = 20
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test.snap b/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test.snap
new file mode 100644
index 00000000..dcc7f9c8
--- /dev/null
+++ b/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+ "homo_sapiens - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,337eebefcdf12475174a668e31bb4245"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,337eebefcdf12475174a668e31bb4245"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:32:47.930923"
+ },
+ "homo_sapiens - bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,337eebefcdf12475174a668e31bb4245"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,337eebefcdf12475174a668e31bb4245"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-08T13:04:50.447095"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml
new file mode 100644
index 00000000..4dbb6856
--- /dev/null
+++ b/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.5.21
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf b/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf
new file mode 100644
index 00000000..e1d869b3
--- /dev/null
+++ b/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf
@@ -0,0 +1,64 @@
+process FGBIO_CALLMOLECULARCONSENSUSREADS {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+ tuple val(meta), path(grouped_bam)
+ val min_reads
+ val min_baseq
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio CallMolecularConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else {
+ mem_gb = task.memory.giga
+ }
+ if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ fgbio \\
+ -Xmx${mem_gb}g \\
+ --tmp-dir=. \\
+ --async-io=true \\
+ --compression=1 \\
+ CallMolecularConsensusReads \\
+ --input $grouped_bam \\
+ --output ${prefix}.bam \\
+ --min-reads ${min_reads} \\
+ --min-input-base-quality ${min_baseq} \\
+ --threads ${task.cpus} \\
+ $args;
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
+ if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+}
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml
new file mode 100644
index 00000000..c7b75eb7
--- /dev/null
+++ b/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml
@@ -0,0 +1,55 @@
+name: fgbio_callmolecularconsensusreads
+description: Calls consensus sequences from reads with the same unique molecular tag.
+keywords:
+ - UMIs
+ - consensus sequence
+ - bam
+tools:
+ - fgbio:
+ description: Tools for working with genomic and high throughput sequencing data.
+ homepage: https://github.com/fulcrumgenomics/fgbio
+ documentation: http://fulcrumgenomics.github.io/fgbio/
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false, collapse:false ]
+ - grouped_bam:
+ type: file
+ description: |
+ The input SAM or BAM file, grouped by UMIs
+ pattern: "*.{bam,sam}"
+ ontologies: []
+ - min_reads:
+ type: integer
+ description: Minimum number of original reads to build each consensus read.
+ - min_baseq:
+ type: integer
+ description: Ignore bases in raw reads that have Q below this value.
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bam":
+ type: file
+ description: |
+ Output SAM or BAM file to write consensus reads.
+ pattern: "*.{bam,sam}"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@sruthipsuresh"
+maintainers:
+ - "@sruthipsuresh"
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/tests/main.nf.test b/modules/nf-core/fgbio/callmolecularconsensusreads/tests/main.nf.test
new file mode 100644
index 00000000..8a906340
--- /dev/null
+++ b/modules/nf-core/fgbio/callmolecularconsensusreads/tests/main.nf.test
@@ -0,0 +1,72 @@
+nextflow_process {
+
+ name "Test Process FGBIO_CALLMOLECULARCONSENSUSREADS"
+ script "../main.nf"
+ process "FGBIO_CALLMOLECULARCONSENSUSREADS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/callmolecularconsensusreads"
+ tag "fgbio/sortbam"
+
+ setup {
+
+ run("FGBIO_SORTBAM") {
+ script "../../sortbam/main.nf"
+ config "./sort.config"
+ process {
+ """
+ input[0] = [[ id:'homo_sapiens_genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ test("homo_sapiens - bam") {
+
+ when {
+ process {
+ """
+ input[0] = FGBIO_SORTBAM.out.bam
+ input[1] = 1
+ input[2] = 20
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = FGBIO_SORTBAM.out.bam
+ input[1] = 1
+ input[2] = 20
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/tests/main.nf.test.snap b/modules/nf-core/fgbio/callmolecularconsensusreads/tests/main.nf.test.snap
new file mode 100644
index 00000000..f37f1bd7
--- /dev/null
+++ b/modules/nf-core/fgbio/callmolecularconsensusreads/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "homo_sapiens - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "homo_sapiens_genome"
+ },
+ "homo_sapiens_genome_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,8dbdae0c815fd6be2c3090ca83f6bbc6"
+ ],
+ "bam": [
+ [
+ {
+ "id": "homo_sapiens_genome"
+ },
+ "homo_sapiens_genome_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8dbdae0c815fd6be2c3090ca83f6bbc6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:33:48.560245"
+ },
+ "homo_sapiens - bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "homo_sapiens_genome"
+ },
+ "homo_sapiens_genome_consensus_unmapped.bam:md5,f56c861f1f604ecc9894dc9182b170f8"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,8dbdae0c815fd6be2c3090ca83f6bbc6"
+ ],
+ "bam": [
+ [
+ {
+ "id": "homo_sapiens_genome"
+ },
+ "homo_sapiens_genome_consensus_unmapped.bam:md5,f56c861f1f604ecc9894dc9182b170f8"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8dbdae0c815fd6be2c3090ca83f6bbc6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-08T13:05:45.874565"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/tests/sort.config b/modules/nf-core/fgbio/callmolecularconsensusreads/tests/sort.config
new file mode 100644
index 00000000..b205c8f2
--- /dev/null
+++ b/modules/nf-core/fgbio/callmolecularconsensusreads/tests/sort.config
@@ -0,0 +1,6 @@
+process {
+ withName: FGBIO_SORTBAM {
+ ext.args = '-s TemplateCoordinate'
+ ext.prefix = { "${meta.id}_out" }
+ }
+}
diff --git a/modules/nf-core/fgbio/collectduplexseqmetrics/environment.yml b/modules/nf-core/fgbio/collectduplexseqmetrics/environment.yml
new file mode 100644
index 00000000..f83d1262
--- /dev/null
+++ b/modules/nf-core/fgbio/collectduplexseqmetrics/environment.yml
@@ -0,0 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.5.21
+ - conda-forge::r-ggplot2=3.5.2
diff --git a/modules/nf-core/fgbio/collectduplexseqmetrics/main.nf b/modules/nf-core/fgbio/collectduplexseqmetrics/main.nf
new file mode 100644
index 00000000..9edf0ee8
--- /dev/null
+++ b/modules/nf-core/fgbio/collectduplexseqmetrics/main.nf
@@ -0,0 +1,80 @@
+process FGBIO_COLLECTDUPLEXSEQMETRICS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+ tuple val(meta), path(grouped_bam)
+ path interval_list
+
+ output:
+ tuple val(meta), path("**.family_sizes.txt") , emit: family_sizes
+ tuple val(meta), path("**.duplex_family_sizes.txt") , emit: duplex_family_sizes
+ tuple val(meta), path("**.duplex_yield_metrics.txt"), emit: duplex_yield_metrics
+ tuple val(meta), path("**.umi_counts.txt") , emit: umi_counts
+ tuple val(meta), path("**.duplex_qc.pdf") , emit: duplex_qc
+ tuple val(meta), path("**.duplex_umi_counts.txt") , emit: duplex_umi_counts, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def intervals = interval_list ? "--intervals ${interval_list}" : ""
+ def mem_gb = 8
+
+ if (!task.memory) {
+ log.info '[fgbio CollectDuplexSeqMetrics] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+
+ """
+ fgbio \\
+ -Xmx${mem_gb}g \\
+ --tmp-dir=. \\
+ --async-io=true \\
+ --compression=1 \\
+ CollectDuplexSeqMetrics \\
+ --input $grouped_bam \\
+ --output ${prefix} \\
+ $intervals \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))")
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def touch_duplex_umi = args.contains("--duplex-umi-counts") || args.contains("-u") ? "touch ${prefix}.duplex_umi_counts.txt" : ""
+
+ """
+ touch ${prefix}.family_sizes.txt
+ touch ${prefix}.duplex_family_sizes.txt
+ touch ${prefix}.duplex_yield_metrics.txt
+ touch ${prefix}.umi_counts.txt
+ touch ${prefix}.duplex_qc.pdf
+ $touch_duplex_umi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fgbio/collectduplexseqmetrics/meta.yml b/modules/nf-core/fgbio/collectduplexseqmetrics/meta.yml
new file mode 100644
index 00000000..947540b3
--- /dev/null
+++ b/modules/nf-core/fgbio/collectduplexseqmetrics/meta.yml
@@ -0,0 +1,130 @@
+name: "fgbio_collectduplexseqmetrics"
+description: Collects a suite of metrics to QC duplex sequencing data.
+keywords:
+ - UMIs
+ - QC
+ - bam
+ - duplex
+tools:
+ - "fgbio":
+ description: "A set of tools for working with genomic and high throughput sequencing
+ data, including UMIs"
+ homepage: "http://fulcrumgenomics.github.io/fgbio/"
+ documentation: "http://fulcrumgenomics.github.io/fgbio/"
+ tool_dev_url: "https://github.com/fulcrumgenomics/fgbio"
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+ - "r-ggplot2":
+ description: "ggplot2 is a system for declaratively creating graphics, based on
+ The Grammar of Graphics. "
+ homepage: "https://ggplot2.tidyverse.org/"
+ documentation: "https://ggplot2.tidyverse.org/"
+ tool_dev_url: "https://github.com/tidyverse/ggplot2"
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - grouped_bam:
+ type: file
+ description: It has to be either 1)The exact BAM output by the GroupReadsByUmi
+ tool (in the sort-order it was produced in) 2)A BAM file that has MI tags
+ present on all reads (usually set by GroupReadsByUmi and has been sorted with
+ SortBam into TemplateCoordinate order.
+ pattern: "*.bam"
+ ontologies: []
+ - interval_list:
+ type: file
+ description: Calculation of metrics may be restricted to a set of regions using
+ the --intervals parameter. The file format is descripted here
+ https://samtools.github.io/htsjdk/javadoc/htsjdk/index.html?htsjdk/samtools/util/Interval.html
+ pattern: "*.{tsv|txt|interval_list}"
+ ontologies: []
+output:
+ family_sizes:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "**.family_sizes.txt":
+ type: file
+ description: Metrics on the frequency of different types of families of different
+ sizes
+ pattern: "*.txt"
+ ontologies: []
+ duplex_family_sizes:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "**.duplex_family_sizes.txt":
+ type: file
+ description: Metrics on the frequency of duplex tag families by the number
+ of observations from each strand
+ pattern: "*.txt"
+ ontologies: []
+ duplex_yield_metrics:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "**.duplex_yield_metrics.txt":
+ type: file
+ description: Summary QC metrics produced using 5%, 10%, 15%...100% of the
+ data
+ pattern: "*.txt"
+ ontologies: []
+ umi_counts:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "**.umi_counts.txt":
+ type: file
+ description: Metrics on the frequency of observations of UMIs within reads
+ and tag families
+ pattern: "*.txt"
+ ontologies: []
+ duplex_qc:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "**.duplex_qc.pdf":
+ type: file
+ description: A series of plots generated from the preceding metrics files
+ for visualization
+ pattern: "*.pdf"
+ ontologies: []
+ duplex_umi_counts:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "**.duplex_umi_counts.txt":
+ type: file
+ description: Metrics on the frequency of observations of duplex UMIs within
+ reads and tag families.
+ pattern: "*.txt"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@georgiakes"
+maintainers:
+ - "@georgiakes"
diff --git a/modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test b/modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test
new file mode 100644
index 00000000..0021229b
--- /dev/null
+++ b/modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test
@@ -0,0 +1,79 @@
+nextflow_process {
+
+ name "Test Process FGBIO_COLLECTDUPLEXSEQMETRICS"
+ script "../main.nf"
+ process "FGBIO_COLLECTDUPLEXSEQMETRICS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/collectduplexseqmetrics"
+
+
+ test("homo_sapiens - bam") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
+ ]
+ input[1]=[]
+
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.family_sizes,
+ process.out.duplex_family_sizes,
+ process.out.duplex_yield_metrics,
+ process.out.umi_counts,
+ process.out.duplex_umi_counts,
+ process.out.versions,
+ file(process.out.duplex_qc[0][1]).name)
+ .match() }
+
+ )
+ }
+
+ }
+
+ test("homo_sapiens - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
+ ]
+ input[1] = []
+
+
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.family_sizes,
+ process.out.duplex_family_sizes,
+ process.out.duplex_yield_metrics,
+ process.out.umi_counts,
+ process.out.duplex_umi_counts,
+ process.out.versions,
+ file(process.out.duplex_qc[0][1]).name)
+ .match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test.snap b/modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test.snap
new file mode 100644
index 00000000..f7b9547f
--- /dev/null
+++ b/modules/nf-core/fgbio/collectduplexseqmetrics/tests/main.nf.test.snap
@@ -0,0 +1,106 @@
+{
+ "homo_sapiens - stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.family_sizes.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.duplex_family_sizes.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.duplex_yield_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.umi_counts.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,d8d6be2d6162514abe0b38fa29f963c4"
+ ],
+ "test.duplex_qc.pdf"
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-08T13:07:01.106818"
+ },
+ "homo_sapiens - bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.family_sizes.txt:md5,a49de49bd587440c316fec830f502620"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.duplex_family_sizes.txt:md5,129e41170b9f5f2f8edce62a686c8548"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.duplex_yield_metrics.txt:md5,237e4e4ee713fdf672b0ee796827fb9d"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.umi_counts.txt:md5,9fe38b2a49ca80492b3a1c6a55679155"
+ ]
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,d8d6be2d6162514abe0b38fa29f963c4"
+ ],
+ "test.duplex_qc.pdf"
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-08T13:06:43.025228"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/copyumifromreadname/environment.yml b/modules/nf-core/fgbio/copyumifromreadname/environment.yml
new file mode 100644
index 00000000..4ebc0924
--- /dev/null
+++ b/modules/nf-core/fgbio/copyumifromreadname/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.4.0
diff --git a/modules/nf-core/fgbio/copyumifromreadname/main.nf b/modules/nf-core/fgbio/copyumifromreadname/main.nf
new file mode 100644
index 00000000..b15c970a
--- /dev/null
+++ b/modules/nf-core/fgbio/copyumifromreadname/main.nf
@@ -0,0 +1,64 @@
+process FGBIO_COPYUMIFROMREADNAME {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/87/87626ef674e2f19366ae6214575a114fe80ce598e796894820550731706a84be/data' :
+ 'community.wave.seqera.io/library/fgbio:2.4.0--913bad9d47ff8ddc' }"
+
+ input:
+ tuple val(meta), path(bam), path(bai)
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ tuple val(meta), path("*.bai"), emit: bai
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}_umi_extracted"
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio CopyUmiFromReadName] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+ """
+ fgbio \\
+ -Xmx${mem_gb}g \\
+ --tmp-dir=. \\
+ --async-io=true \\
+ CopyUmiFromReadName \\
+ ${args} \\
+ --input ${bam} \\
+ --output ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}_umi_extracted"
+ """
+
+ touch ${prefix}.bam
+ touch ${prefix}.bai
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$(fgbio --version)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fgbio/copyumifromreadname/meta.yml b/modules/nf-core/fgbio/copyumifromreadname/meta.yml
new file mode 100644
index 00000000..7cf4c994
--- /dev/null
+++ b/modules/nf-core/fgbio/copyumifromreadname/meta.yml
@@ -0,0 +1,80 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "fgbio_copyumifromreadname"
+description: Copies the UMI at the end of a bam files read name to the RX tag.
+keywords:
+ - fgbio
+ - copy
+ - umi
+ - readname
+tools:
+ - "fgbio":
+ description: "A set of tools for working with genomic and high throughput sequencing
+ data, including UMIs"
+ homepage: http://fulcrumgenomics.github.io/fgbio/
+ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/CallDuplexConsensusReads.html
+ tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+
+ - bam:
+ type: file
+ description: Sorted BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
+ ontologies:
+ - edam: "http://edamontology.org/format_2572" # BAM
+ - edam: "http://edamontology.org/format_2573" # CRAM
+ - edam: "http://edamontology.org/format_3462" # SAM
+
+ - bai:
+ type: file
+ description: Index for bam file
+ pattern: "*.{bai}"
+ ontologies:
+ - edam: "http://edamontology.org/format_2572" # BAM
+ - edam: "http://edamontology.org/format_2573" # CRAM
+ - edam: "http://edamontology.org/format_3462" # SAM
+
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "*.bam":
+ type: file
+ description: Sorted BAM file
+ pattern: "*.{bam}"
+ ontologies:
+ - edam: "http://edamontology.org/format_2572" # BAM
+ bai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "*.bai":
+ type: file
+ description: Index for bam file
+ pattern: "*.{bai}"
+ ontologies:
+ - edam: "http://edamontology.org/format_3327" # BAI
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@sppearce"
+maintainers:
+ - "@sppearce"
diff --git a/modules/nf-core/fgbio/copyumifromreadname/tests/main.nf.test b/modules/nf-core/fgbio/copyumifromreadname/tests/main.nf.test
new file mode 100644
index 00000000..83d67a42
--- /dev/null
+++ b/modules/nf-core/fgbio/copyumifromreadname/tests/main.nf.test
@@ -0,0 +1,75 @@
+nextflow_process {
+
+ name "Test Process FGBIO_COPYUMIFROMREADNAME"
+ script "../main.nf"
+ process "FGBIO_COPYUMIFROMREADNAME"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/copyumifromreadname"
+ config "./nextflow.config"
+
+ test("sarscov2 - bam") {
+
+ when {
+ params {
+ module_args = '--field-delimiter "_" '
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ process.out,
+ path(process.out.versions[0]).yaml
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ process.out,
+ path(process.out.versions[0]).yaml
+ ).match()
+ }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fgbio/copyumifromreadname/tests/main.nf.test.snap b/modules/nf-core/fgbio/copyumifromreadname/tests/main.nf.test.snap
new file mode 100644
index 00000000..d65ff345
--- /dev/null
+++ b/modules/nf-core/fgbio/copyumifromreadname/tests/main.nf.test.snap
@@ -0,0 +1,110 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,1440a1d99b4c503c037f5325445eb7e6"
+ ],
+ "bai": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,1440a1d99b4c503c037f5325445eb7e6"
+ ]
+ },
+ {
+ "FGBIO_COPYUMIFROMREADNAME": {
+ "fgbio": null
+ }
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "24.10.5"
+ },
+ "timestamp": "2025-04-21T10:27:36.454432228"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bam:md5,245b5f4c2002dc6560353c5183247df3"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bai:md5,d99827a46b6de71e2338f59eb69b13fc"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,047dd6edb85ae3f51a255523a2bfcfc6"
+ ],
+ "bai": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bai:md5,d99827a46b6de71e2338f59eb69b13fc"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test"
+ },
+ "test_umi_extracted.bam:md5,245b5f4c2002dc6560353c5183247df3"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,047dd6edb85ae3f51a255523a2bfcfc6"
+ ]
+ },
+ {
+ "FGBIO_COPYUMIFROMREADNAME": {
+ "fgbio": "2.4.0"
+ }
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "24.10.5"
+ },
+ "timestamp": "2025-04-21T10:27:16.185419539"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/copyumifromreadname/tests/nextflow.config b/modules/nf-core/fgbio/copyumifromreadname/tests/nextflow.config
new file mode 100644
index 00000000..d6d31951
--- /dev/null
+++ b/modules/nf-core/fgbio/copyumifromreadname/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: "FGBIO_COPYUMIFROMREADNAME" {
+ ext.args = params.module_args
+ }
+}
diff --git a/modules/nf-core/fgbio/fastqtobam/environment.yml b/modules/nf-core/fgbio/fastqtobam/environment.yml
new file mode 100644
index 00000000..4dbb6856
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.5.21
diff --git a/modules/nf-core/fgbio/fastqtobam/main.nf b/modules/nf-core/fgbio/fastqtobam/main.nf
new file mode 100644
index 00000000..6ee64bb3
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/main.nf
@@ -0,0 +1,70 @@
+process FGBIO_FASTQTOBAM {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.bam") , emit: bam , optional: true
+ tuple val(meta), path("*.cram"), emit: cram, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def suffix = task.ext.suffix ?: "bam"
+ def sample_name = args.contains("--sample") ? "" : "--sample ${prefix}"
+ def library_name = args.contains("--library") ? "" : "--library ${prefix}"
+
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio FastqToBam] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+
+ """
+ fgbio \\
+ -Xmx${mem_gb}g \\
+ --tmp-dir=. \\
+ --async-io=true \\
+ FastqToBam \\
+ ${args} \\
+ --input ${reads} \\
+ --output ${prefix}.${suffix} \\
+ ${sample_name} \\
+ ${library_name}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def suffix = task.ext.suffix ?: "bam"
+
+ """
+ touch ${prefix}.${suffix}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fgbio/fastqtobam/meta.yml b/modules/nf-core/fgbio/fastqtobam/meta.yml
new file mode 100644
index 00000000..d92f0a60
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/meta.yml
@@ -0,0 +1,65 @@
+name: fgbio_fastqtobam
+description: |
+ Using the fgbio tools, converts FASTQ files sequenced into unaligned BAM or CRAM files possibly moving the UMI barcode into the RX field of the reads
+keywords:
+ - unaligned
+ - bam
+ - cram
+tools:
+ - fgbio:
+ description: A set of tools for working with genomic and high throughput sequencing
+ data, including UMIs
+ homepage: http://fulcrumgenomics.github.io/fgbio/
+ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/
+ tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: pair of reads to be converted into BAM file
+ pattern: "*.{fastq.gz}"
+ ontologies: []
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bam":
+ type: file
+ description: Unaligned, unsorted BAM file
+ pattern: "*.{bam}"
+ ontologies: []
+ cram:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.cram":
+ type: file
+ description: Unaligned, unsorted CRAM file
+ pattern: "*.{cram}"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@lescai"
+ - "@matthdsm"
+ - "@nvnieuwk"
+maintainers:
+ - "@lescai"
+ - "@matthdsm"
+ - "@nvnieuwk"
diff --git a/modules/nf-core/fgbio/fastqtobam/tests/bam.config b/modules/nf-core/fgbio/fastqtobam/tests/bam.config
new file mode 100644
index 00000000..014ba920
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/tests/bam.config
@@ -0,0 +1,3 @@
+process {
+ ext.suffix = "bam"
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/fastqtobam/tests/cram.config b/modules/nf-core/fgbio/fastqtobam/tests/cram.config
new file mode 100644
index 00000000..2406cb99
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/tests/cram.config
@@ -0,0 +1,3 @@
+process {
+ ext.suffix = "cram"
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/fastqtobam/tests/custom_sample.config b/modules/nf-core/fgbio/fastqtobam/tests/custom_sample.config
new file mode 100644
index 00000000..2ed567b4
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/tests/custom_sample.config
@@ -0,0 +1,3 @@
+process {
+ ext.args = "--sample CustomSample --library CustomLibrary"
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/fastqtobam/tests/main.nf.test b/modules/nf-core/fgbio/fastqtobam/tests/main.nf.test
new file mode 100644
index 00000000..d10a0052
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/tests/main.nf.test
@@ -0,0 +1,218 @@
+nextflow_process {
+
+ name "Test Process FGBIO_FASTQTOBAM"
+ script "../main.nf"
+ process "FGBIO_FASTQTOBAM"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/fastqtobam"
+
+ test("homo_sapiens - [fastq1, fastq2] - default") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [fastq1, fastq2] - cram") {
+
+ config "./cram.config"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.bam,
+ file(process.out.cram[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [fastq1, fastq2] - bam") {
+
+ config "./bam.config"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - fastq1") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [fastq1, fastq2] - umi") {
+
+ config "./umi.config"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [fastq1, fastq2] - custom sample") {
+
+ config "./custom_sample.config"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [fastq1, fastq2] - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.cram,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+}
diff --git a/modules/nf-core/fgbio/fastqtobam/tests/main.nf.test.snap b/modules/nf-core/fgbio/fastqtobam/tests/main.nf.test.snap
new file mode 100644
index 00000000..cc01344d
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/tests/main.nf.test.snap
@@ -0,0 +1,114 @@
+{
+ "homo_sapiens - [fastq1, fastq2] - cram": {
+ "content": [
+ [
+
+ ],
+ "test.cram",
+ [
+ "versions.yml:md5,468bbf74a89c7db86a209ad9bbfa7736"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-05T16:33:29.120923"
+ },
+ "homo_sapiens - fastq1": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,468bbf74a89c7db86a209ad9bbfa7736"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-05T16:34:06.610383"
+ },
+ "homo_sapiens - [fastq1, fastq2] - default": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,468bbf74a89c7db86a209ad9bbfa7736"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-05T16:33:02.837327"
+ },
+ "homo_sapiens - [fastq1, fastq2] - umi": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,468bbf74a89c7db86a209ad9bbfa7736"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-05T16:34:25.224411"
+ },
+ "homo_sapiens - [fastq1, fastq2] - bam": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,468bbf74a89c7db86a209ad9bbfa7736"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-05T16:33:47.975145"
+ },
+ "homo_sapiens - [fastq1, fastq2] - custom sample": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,468bbf74a89c7db86a209ad9bbfa7736"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-05T16:34:43.606837"
+ },
+ "homo_sapiens - [fastq1, fastq2] - stub": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ "versions.yml:md5,468bbf74a89c7db86a209ad9bbfa7736"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-05T16:34:57.596241"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/fastqtobam/tests/umi.config b/modules/nf-core/fgbio/fastqtobam/tests/umi.config
new file mode 100644
index 00000000..7b668aa9
--- /dev/null
+++ b/modules/nf-core/fgbio/fastqtobam/tests/umi.config
@@ -0,0 +1,3 @@
+process {
+ ext.args = "--read-structures +T 12M11S+T"
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/filterconsensusreads/environment.yml b/modules/nf-core/fgbio/filterconsensusreads/environment.yml
new file mode 100644
index 00000000..4dbb6856
--- /dev/null
+++ b/modules/nf-core/fgbio/filterconsensusreads/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.5.21
diff --git a/modules/nf-core/fgbio/filterconsensusreads/fgbio-filterconsensusreads.diff b/modules/nf-core/fgbio/filterconsensusreads/fgbio-filterconsensusreads.diff
new file mode 100644
index 00000000..9d55d91e
--- /dev/null
+++ b/modules/nf-core/fgbio/filterconsensusreads/fgbio-filterconsensusreads.diff
@@ -0,0 +1,20 @@
+Changes in component 'nf-core/fgbio/filterconsensusreads'
+'modules/nf-core/fgbio/filterconsensusreads/environment.yml' is unchanged
+Changes in 'fgbio/filterconsensusreads/main.nf':
+--- modules/nf-core/fgbio/filterconsensusreads/main.nf
++++ modules/nf-core/fgbio/filterconsensusreads/main.nf
+@@ -8,8 +8,7 @@
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+- tuple val(meta), path(bam)
+- tuple val(meta2), path(fasta)
++ tuple val(meta), path(bam), path(fasta)
+ val(min_reads)
+ val(min_baseq)
+ val(max_base_error_rate)
+
+'modules/nf-core/fgbio/filterconsensusreads/meta.yml' is unchanged
+'modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test' is unchanged
+'modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test.snap' is unchanged
+************************************************************
diff --git a/modules/nf-core/fgbio/filterconsensusreads/main.nf b/modules/nf-core/fgbio/filterconsensusreads/main.nf
new file mode 100644
index 00000000..007887e2
--- /dev/null
+++ b/modules/nf-core/fgbio/filterconsensusreads/main.nf
@@ -0,0 +1,70 @@
+process FGBIO_FILTERCONSENSUSREADS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+ tuple val(meta), path(bam), path(fasta)
+ val(min_reads)
+ val(min_baseq)
+ val(max_base_error_rate)
+
+ output:
+ tuple val(meta), path("${prefix}.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}_consensus_filtered"
+
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio FilterConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+ if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+ """
+ fgbio \\
+ -Xmx${mem_gb}g \\
+ --tmp-dir=. \\
+ --compression=0 \\
+ FilterConsensusReads \\
+ --input $bam \\
+ --output ${prefix}.bam \\
+ --ref ${fasta} \\
+ --min-reads ${min_reads} \\
+ --min-base-quality ${min_baseq} \\
+ --max-base-error-rate ${max_base_error_rate} \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}_consensus_filtered"
+ if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fgbio/filterconsensusreads/meta.yml b/modules/nf-core/fgbio/filterconsensusreads/meta.yml
new file mode 100644
index 00000000..17c4f8a1
--- /dev/null
+++ b/modules/nf-core/fgbio/filterconsensusreads/meta.yml
@@ -0,0 +1,74 @@
+name: "fgbio_filterconsensusreads"
+description: Uses FGBIO FilterConsensusReads to filter consensus reads generated by
+ CallMolecularConsensusReads or CallDuplexConsensusReads.
+keywords:
+ - fgbio
+ - filter
+ - consensus
+ - umi
+ - duplexumi
+tools:
+ - "fgbio":
+ description: "A set of tools for working with genomic and high throughput sequencing
+ data, including UMIs"
+ homepage: http://fulcrumgenomics.github.io/fgbio/
+ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/FilterConsensusReads.html
+ tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM file
+ pattern: "*.bam"
+ ontologies: []
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing genome information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: Fasta file containing genomic sequence information
+ pattern: "*.bam"
+ ontologies: []
+ - min_reads:
+ type: integer
+ description: Minimum number of reads required to keep a consensus read
+ - min_baseq:
+ type: file
+ description: Minimum base quality to consider
+ ontologies: []
+ - max_base_error_rate:
+ type: file
+ description: Maximum base error rate for a position before it is replaced with
+ an N.
+ ontologies: []
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.bam:
+ type: file
+ description: Filtered consensus BAM file
+ pattern: "*.bam"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@lescai"
+maintainers:
+ - "@lescai"
diff --git a/modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test b/modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test
new file mode 100644
index 00000000..e4f3511f
--- /dev/null
+++ b/modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test
@@ -0,0 +1,69 @@
+nextflow_process {
+
+ name "Test Process FGBIO_FILTERCONSENSUSREADS"
+ script "../main.nf"
+
+ process "FGBIO_FILTERCONSENSUSREADS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/filterconsensusreads"
+
+ test("sarscov2 - bam") {
+
+ when {
+ process {
+ """
+ input[0] = [[ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_duplex_consensus.bam', checkIfExists: true)
+ ]
+ input[1] = [[ id:'homo_sapiens'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = 3
+ input[3] = 45
+ input[4] = 0.2
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [[ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_duplex_consensus.bam', checkIfExists: true)
+ ]
+ input[1] = [[ id:'homo_sapiens'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = 3
+ input[3] = 45
+ input[4] = 0.2
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test.snap b/modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test.snap
new file mode 100644
index 00000000..4dff39ad
--- /dev/null
+++ b/modules/nf-core/fgbio/filterconsensusreads/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,be19391d55fe52c0fd32a844b1aceeb1"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,be19391d55fe52c0fd32a844b1aceeb1"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:37:18.521589"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_filtered.bam:md5,3d3c128a00a1e3c466275516f345daac"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,be19391d55fe52c0fd32a844b1aceeb1"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_consensus_filtered.bam:md5,3d3c128a00a1e3c466275516f345daac"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,be19391d55fe52c0fd32a844b1aceeb1"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:37:04.297362"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/groupreadsbyumi/environment.yml b/modules/nf-core/fgbio/groupreadsbyumi/environment.yml
new file mode 100644
index 00000000..4dbb6856
--- /dev/null
+++ b/modules/nf-core/fgbio/groupreadsbyumi/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.5.21
diff --git a/modules/nf-core/fgbio/groupreadsbyumi/main.nf b/modules/nf-core/fgbio/groupreadsbyumi/main.nf
new file mode 100644
index 00000000..f7725219
--- /dev/null
+++ b/modules/nf-core/fgbio/groupreadsbyumi/main.nf
@@ -0,0 +1,70 @@
+process FGBIO_GROUPREADSBYUMI {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+ tuple val(meta), path(bam)
+ val(strategy)
+
+ output:
+ tuple val(meta), path("*.bam") , emit: bam
+ tuple val(meta), path("*histogram.txt") , emit: histogram
+ tuple val(meta), path("*read-metrics.txt"), emit: read_metrics
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}_umi-grouped"
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio FilterConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+
+ if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+ """
+ fgbio \\
+ -Xmx${mem_gb}g \\
+ --tmp-dir=. \\
+ GroupReadsByUmi \\
+ -s $strategy \\
+ $args \\
+ -i $bam \\
+ -o ${prefix}.bam \\
+ -f ${prefix}_histogram.txt \\
+ --grouping-metrics ${prefix}_read-metrics.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}_umi-grouped"
+ if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.bam
+ touch ${prefix}_histogram.txt
+ touch ${prefix}_read-metrics.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fgbio/groupreadsbyumi/meta.yml b/modules/nf-core/fgbio/groupreadsbyumi/meta.yml
new file mode 100644
index 00000000..eb22ec10
--- /dev/null
+++ b/modules/nf-core/fgbio/groupreadsbyumi/meta.yml
@@ -0,0 +1,84 @@
+name: fgbio_groupreadsbyumi
+description: |
+ Groups reads together that appear to have come from the same original molecule.
+ Reads are grouped by template, and then templates are sorted by the 5’ mapping positions
+ of the reads from the template, used from earliest mapping position to latest.
+ Reads that have the same end positions are then sub-grouped by UMI sequence.
+ (!) Note: the MQ tag is required on reads with mapped mates (!)
+ This can be added using samblaster with the optional argument --addMateTags.
+keywords:
+ - UMI
+ - groupreads
+ - fgbio
+tools:
+ - fgbio:
+ description: A set of tools for working with genomic and high throughput sequencing
+ data, including UMIs
+ homepage: http://fulcrumgenomics.github.io/fgbio/
+ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/
+ tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: |
+ BAM file. Note: the MQ tag is required on reads with mapped mates (!)
+ pattern: "*.bam"
+ ontologies: []
+ - strategy:
+ type: string
+ enum: ["Identity", "Edit", "Adjacency", "Paired"]
+ description: |
+ Required argument: defines the UMI assignment strategy.
+ Must be chosen among: Identity, Edit, Adjacency, Paired.
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bam":
+ type: file
+ description: UMI-grouped BAM
+ pattern: "*.bam"
+ ontologies: []
+ histogram:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*histogram.txt":
+ type: file
+ description: A text file containing the tag family size counts
+ pattern: "*.txt"
+ ontologies: []
+ read_metrics:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*read-metrics.txt":
+ type: file
+ description: A text file containing the read count metrics from grouping
+ pattern: "*.txt"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@lescai"
+maintainers:
+ - "@lescai"
diff --git a/modules/nf-core/fgbio/groupreadsbyumi/tests/main.nf.test b/modules/nf-core/fgbio/groupreadsbyumi/tests/main.nf.test
new file mode 100644
index 00000000..a9e8bd25
--- /dev/null
+++ b/modules/nf-core/fgbio/groupreadsbyumi/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_process {
+
+ name "Test Process FGBIO_GROUPREADSBYUMI"
+ script "../main.nf"
+ process "FGBIO_GROUPREADSBYUMI"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/groupreadsbyumi"
+
+ test("sarscov2 - bam") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam', checkIfExists: true)
+ ]
+ input[1] = "Adjacency"
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam', checkIfExists: true)
+ ]
+ input[1] = "Adjacency"
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fgbio/groupreadsbyumi/tests/main.nf.test.snap b/modules/nf-core/fgbio/groupreadsbyumi/tests/main.nf.test.snap
new file mode 100644
index 00000000..00de4ac0
--- /dev/null
+++ b/modules/nf-core/fgbio/groupreadsbyumi/tests/main.nf.test.snap
@@ -0,0 +1,144 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_read-metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,3e8002a4c4eef8dc0a715dd9585eeb5b"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "histogram": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "read_metrics": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_read-metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,3e8002a4c4eef8dc0a715dd9585eeb5b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:37:53.48947"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped.bam:md5,35bfc992c30d8e3e50816159fa58cb11"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_histogram.txt:md5,9a0c622b65209afbce0840e2affff983"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_read-metrics.txt:md5,a5f75e3e390e30791a636fed355e0afd"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,3e8002a4c4eef8dc0a715dd9585eeb5b"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped.bam:md5,35bfc992c30d8e3e50816159fa58cb11"
+ ]
+ ],
+ "histogram": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_histogram.txt:md5,9a0c622b65209afbce0840e2affff983"
+ ]
+ ],
+ "read_metrics": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_umi-grouped_read-metrics.txt:md5,a5f75e3e390e30791a636fed355e0afd"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,3e8002a4c4eef8dc0a715dd9585eeb5b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-10T08:59:32.932448"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/sortbam/environment.yml b/modules/nf-core/fgbio/sortbam/environment.yml
new file mode 100644
index 00000000..9645b667
--- /dev/null
+++ b/modules/nf-core/fgbio/sortbam/environment.yml
@@ -0,0 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/fgbio
+ - bioconda::fgbio=2.5.21
diff --git a/modules/nf-core/fgbio/sortbam/main.nf b/modules/nf-core/fgbio/sortbam/main.nf
new file mode 100644
index 00000000..3b3e6521
--- /dev/null
+++ b/modules/nf-core/fgbio/sortbam/main.nf
@@ -0,0 +1,61 @@
+process FGBIO_SORTBAM {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}_sorted"
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio SortBam] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+
+ if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+ """
+ fgbio -Xmx${mem_gb}g \\
+ --async-io=true \\
+ --tmp-dir=. \\
+ SortBam \\
+ -i $bam \\
+ $args \\
+ -o ${prefix}.bam
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}_sorted"
+ if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fgbio/sortbam/meta.yml b/modules/nf-core/fgbio/sortbam/meta.yml
new file mode 100644
index 00000000..81c295d6
--- /dev/null
+++ b/modules/nf-core/fgbio/sortbam/meta.yml
@@ -0,0 +1,50 @@
+name: fgbio_sortbam
+description: Sorts a SAM or BAM file. Several sort orders are available, including
+ coordinate, queryname, random, and randomquery.
+keywords:
+ - sort
+ - bam
+ - sam
+tools:
+ - fgbio:
+ description: Tools for working with genomic and high throughput sequencing data.
+ homepage: https://github.com/fulcrumgenomics/fgbio
+ documentation: http://fulcrumgenomics.github.io/fgbio/
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false, collapse:false ]
+ - bam:
+ type: file
+ description: |
+ The input SAM or BAM file to be sorted.
+ pattern: "*.{bam,sam}"
+ ontologies: []
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bam":
+ type: file
+ description: |
+ Output SAM or BAM file.
+ pattern: "*.{bam,sam}"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@sruthipsuresh"
+maintainers:
+ - "@sruthipsuresh"
diff --git a/modules/nf-core/fgbio/sortbam/tests/main.nf.test b/modules/nf-core/fgbio/sortbam/tests/main.nf.test
new file mode 100644
index 00000000..2e9b2459
--- /dev/null
+++ b/modules/nf-core/fgbio/sortbam/tests/main.nf.test
@@ -0,0 +1,56 @@
+nextflow_process {
+
+ name "Test Process FGBIO_SORTBAM"
+ script "../main.nf"
+ process "FGBIO_SORTBAM"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/sortbam"
+
+ test("sarscov2 - bam") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fgbio/sortbam/tests/main.nf.test.snap b/modules/nf-core/fgbio/sortbam/tests/main.nf.test.snap
new file mode 100644
index 00000000..cb8d6768
--- /dev/null
+++ b/modules/nf-core/fgbio/sortbam/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c6608b61c38dcf9142a28a0d665eb96d"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test"
+ },
+ "test_sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c6608b61c38dcf9142a28a0d665eb96d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:38:27.474292"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_sorted.bam:md5,1d7a558a72b7aecc80946cb9cadf8f60"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c6608b61c38dcf9142a28a0d665eb96d"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test"
+ },
+ "test_sorted.bam:md5,1d7a558a72b7aecc80946cb9cadf8f60"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c6608b61c38dcf9142a28a0d665eb96d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:38:12.994113"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/zipperbams/environment.yml b/modules/nf-core/fgbio/zipperbams/environment.yml
new file mode 100644
index 00000000..4dbb6856
--- /dev/null
+++ b/modules/nf-core/fgbio/zipperbams/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fgbio=2.5.21
diff --git a/modules/nf-core/fgbio/zipperbams/fgbio-zipperbams.diff b/modules/nf-core/fgbio/zipperbams/fgbio-zipperbams.diff
new file mode 100644
index 00000000..48614230
--- /dev/null
+++ b/modules/nf-core/fgbio/zipperbams/fgbio-zipperbams.diff
@@ -0,0 +1,40 @@
+Changes in component 'nf-core/fgbio/zipperbams'
+'modules/nf-core/fgbio/zipperbams/environment.yml' is unchanged
+Changes in 'fgbio/zipperbams/main.nf':
+--- modules/nf-core/fgbio/zipperbams/main.nf
++++ modules/nf-core/fgbio/zipperbams/main.nf
+@@ -8,10 +8,8 @@
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+- tuple val(meta), path(unmapped_bam)
+- tuple val(meta2), path(mapped_bam)
+- tuple val(meta3), path(fasta)
+- tuple val(meta4), path(dict)
++
++ tuple val(meta), path(unmapped_bam), path(mapped_bam), path(fasta), path(dict)
+
+ output:
+ tuple val(meta), path("${prefix}.bam"), emit: bam
+@@ -22,7 +20,6 @@
+
+ script:
+ def args = task.ext.args ?: ''
+- def args2 = task.ext.args2 ?: ''
+ def compression = task.ext.compression ?: '0'
+ prefix = task.ext.prefix ?: "${meta.id}_zipped"
+ def mem_gb = 8
+@@ -50,7 +47,6 @@
+ ${args} \\
+ --output ${prefix}.bam
+
+-
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+
+'modules/nf-core/fgbio/zipperbams/meta.yml' is unchanged
+'modules/nf-core/fgbio/zipperbams/tests/main.nf.test' is unchanged
+'modules/nf-core/fgbio/zipperbams/tests/main.nf.test.snap' is unchanged
+'modules/nf-core/fgbio/zipperbams/tests/nextflow.config' is unchanged
+************************************************************
diff --git a/modules/nf-core/fgbio/zipperbams/main.nf b/modules/nf-core/fgbio/zipperbams/main.nf
new file mode 100644
index 00000000..cb723439
--- /dev/null
+++ b/modules/nf-core/fgbio/zipperbams/main.nf
@@ -0,0 +1,69 @@
+process FGBIO_ZIPPERBAMS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4047e3e517b57fae311eab139a12f0887d898b7da5fceeb2a1029c73b9e3904/data' :
+ 'community.wave.seqera.io/library/fgbio:2.5.21--368dab1b4f308243' }"
+
+ input:
+
+ tuple val(meta), path(unmapped_bam), path(mapped_bam), path(fasta), path(dict)
+
+ output:
+ tuple val(meta), path("${prefix}.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def compression = task.ext.compression ?: '0'
+ prefix = task.ext.prefix ?: "${meta.id}_zipped"
+ def mem_gb = 8
+ if (!task.memory) {
+ log.info '[fgbio ZipperBams] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+ } else if (mem_gb > task.memory.giga) {
+ if (task.memory.giga < 2) {
+ mem_gb = 1
+ } else {
+ mem_gb = task.memory.giga - 1
+ }
+ }
+
+ if ("${unmapped_bam}" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ if ("${mapped_bam}" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+ """
+ fgbio -Xmx${mem_gb}g \\
+ --compression ${compression} \\
+ --async-io=true \\
+ ZipperBams \\
+ --unmapped ${unmapped_bam} \\
+ --input ${mapped_bam} \\
+ --ref ${fasta} \\
+ ${args} \\
+ --output ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}_zipped"
+ if ("${unmapped_bam}" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ if ("${mapped_bam}" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fgbio/zipperbams/meta.yml b/modules/nf-core/fgbio/zipperbams/meta.yml
new file mode 100644
index 00000000..052b50e1
--- /dev/null
+++ b/modules/nf-core/fgbio/zipperbams/meta.yml
@@ -0,0 +1,82 @@
+name: "fgbio_zipperbams"
+description: FGBIO tool to zip together an unmapped and mapped BAM to transfer metadata
+ into the output BAM
+keywords:
+ - fgbio
+ - umi
+ - unmapped
+ - ubam
+ - zipperbams
+tools:
+ - fgbio:
+ description: A set of tools for working with genomic and high throughput sequencing
+ data, including UMIs
+ homepage: http://fulcrumgenomics.github.io/fgbio/
+ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/
+ tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+ licence: ["MIT"]
+ identifier: biotools:fgbio
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - unmapped_bam:
+ type: file
+ description: unmapped BAM file
+ pattern: "*.bam"
+ ontologies: []
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - mapped_bam:
+ type: file
+ description: mapped BAM/SAM file
+ pattern: "*.{bam,sam}"
+ ontologies: []
+ - - meta3:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'GRCh38' ]
+ - fasta:
+ type: file
+ description: fasta file containing genomic sequence information
+ pattern: "*.{fasta,fa}"
+ ontologies: []
+ - - meta4:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'GRCh38' ]
+ - dict:
+ type: file
+ description: dict file containing a sequence dictionary for the fasta file
+ pattern: "*.{dict}"
+ ontologies: []
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ${prefix}.bam:
+ type: file
+ description: Zipped BAM file
+ pattern: "*.bam"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@lescai"
+maintainers:
+ - "@lescai"
diff --git a/modules/nf-core/fgbio/zipperbams/tests/main.nf.test b/modules/nf-core/fgbio/zipperbams/tests/main.nf.test
new file mode 100644
index 00000000..89f7ce5c
--- /dev/null
+++ b/modules/nf-core/fgbio/zipperbams/tests/main.nf.test
@@ -0,0 +1,83 @@
+nextflow_process {
+
+ name "Test Process FGBIO_ZIPPERBAMS"
+ script "../main.nf"
+ process "FGBIO_ZIPPERBAMS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fgbio"
+ tag "fgbio/zipperbams"
+
+ test("sarscov2 - bam") {
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_unmapped.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test', single_end:false ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped.bam', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_unmapped.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test', single_end:false ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped.bam', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fgbio/zipperbams/tests/main.nf.test.snap b/modules/nf-core/fgbio/zipperbams/tests/main.nf.test.snap
new file mode 100644
index 00000000..9ceb5b24
--- /dev/null
+++ b/modules/nf-core/fgbio/zipperbams/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_zipped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,786ad0edcd8c1ead6fd6d8f8a751f971"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_zipped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,786ad0edcd8c1ead6fd6d8f8a751f971"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:39:17.538398"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_zipped.bam:md5,1980b44177f4720f1005c9be62b09f79"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,786ad0edcd8c1ead6fd6d8f8a751f971"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_zipped.bam:md5,1980b44177f4720f1005c9be62b09f79"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,786ad0edcd8c1ead6fd6d8f8a751f971"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.2"
+ },
+ "timestamp": "2025-06-06T16:38:57.558961"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fgbio/zipperbams/tests/nextflow.config b/modules/nf-core/fgbio/zipperbams/tests/nextflow.config
new file mode 100644
index 00000000..4c4c4ddc
--- /dev/null
+++ b/modules/nf-core/fgbio/zipperbams/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: "FGBIO_ZIPPERBAMS" {
+ ext.args = "--tags-to-reverse Consensus --tags-to-revcomp Consensus"
+ }
+}
diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml
new file mode 100644
index 00000000..62054fc9
--- /dev/null
+++ b/modules/nf-core/samtools/fastq/environment.yml
@@ -0,0 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::htslib=1.21
+ - bioconda::samtools=1.21
diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf
new file mode 100644
index 00000000..bcc5d604
--- /dev/null
+++ b/modules/nf-core/samtools/fastq/main.nf
@@ -0,0 +1,60 @@
+process SAMTOOLS_FASTQ {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' :
+ 'biocontainers/samtools:1.21--h50ea8bc_0' }"
+
+ input:
+ tuple val(meta), path(input)
+ val(interleave)
+
+ output:
+ tuple val(meta), path("*_{1,2}.fastq.gz") , optional:true, emit: fastq
+ tuple val(meta), path("*_interleaved.fastq") , optional:true, emit: interleaved
+ tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton
+ tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" :
+ meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" :
+ "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz"
+ """
+ # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads).
+ samtools \\
+ fastq \\
+ $args \\
+ --threads ${task.cpus-1} \\
+ -0 ${prefix}_other.fastq.gz \\
+ $input \\
+ $output
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def output = ( interleave && ! meta.single_end ) ? "touch ${prefix}_interleaved.fastq" :
+ meta.single_end ? "echo | gzip > ${prefix}_1.fastq.gz && echo | gzip > ${prefix}_singleton.fastq.gz" :
+ "echo | gzip > ${prefix}_1.fastq.gz && echo | gzip > ${prefix}_2.fastq.gz && echo | gzip > ${prefix}_singleton.fastq.gz"
+ """
+ ${output}
+ echo | gzip > ${prefix}_other.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/samtools/fastq/meta.yml b/modules/nf-core/samtools/fastq/meta.yml
new file mode 100644
index 00000000..9a5bd42f
--- /dev/null
+++ b/modules/nf-core/samtools/fastq/meta.yml
@@ -0,0 +1,96 @@
+name: samtools_fastq
+description: Converts a SAM/BAM/CRAM file to FASTQ
+keywords:
+ - bam
+ - sam
+ - cram
+ - fastq
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input:
+ type: file
+ description: BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
+ ontologies: []
+ - interleave:
+ type: boolean
+ description: Set true for interleaved fastq file
+output:
+ fastq:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*_{1,2}.fastq.gz":
+ type: file
+ description: Compressed FASTQ file(s) with reads with either the READ1 or
+ READ2 flag set in separate files.
+ pattern: "*_{1,2}.fastq.gz"
+ ontologies: []
+ interleaved:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*_interleaved.fastq":
+ type: file
+ description: Compressed FASTQ file with reads with either the READ1 or READ2
+ flag set in a combined file. Needs collated input file.
+ pattern: "*_interleaved.fastq.gz"
+ ontologies:
+ - edam: http://edamontology.org/format_3989 # GZIP format
+ singleton:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*_singleton.fastq.gz":
+ type: file
+ description: Compressed FASTQ file with singleton reads
+ pattern: "*_singleton.fastq.gz"
+ ontologies:
+ - edam: http://edamontology.org/format_3989 # GZIP format
+ other:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*_other.fastq.gz":
+ type: file
+ description: Compressed FASTQ file with reads with either both READ1 and READ2
+ flags set or unset
+ pattern: "*_other.fastq.gz"
+ ontologies:
+ - edam: http://edamontology.org/format_3989 # GZIP format
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@priyanka-surana"
+ - "@suzannejin"
+maintainers:
+ - "@priyanka-surana"
+ - "@suzannejin"
diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test b/modules/nf-core/samtools/fastq/tests/main.nf.test
new file mode 100644
index 00000000..971ea1d4
--- /dev/null
+++ b/modules/nf-core/samtools/fastq/tests/main.nf.test
@@ -0,0 +1,119 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_FASTQ"
+ script "../main.nf"
+ process "SAMTOOLS_FASTQ"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/fastq"
+
+ test("bam") {
+
+ when {
+ process {
+ """
+ interleave = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = interleave
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.fastq[0][1].collect { path(it).linesGzip[0..6] }).match("bam_fastq") },
+ { assert snapshot(process.out.interleaved).match("bam_interleaved") },
+ { assert snapshot(file(process.out.singleton[0][1]).name).match("bam_singleton") },
+ { assert snapshot(file(process.out.other[0][1]).name).match("bam_other") },
+ { assert snapshot(process.out.versions).match("bam_versions") }
+ )
+ }
+ }
+
+ test("bam_interleave") {
+
+ when {
+ process {
+ """
+ interleave = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = interleave
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.fastq).match("bam_interleave_fastq") },
+ { assert snapshot(path(process.out.interleaved[0][1]).readLines()[0..6]).match("bam_interlinterleave_eaved") },
+ { assert snapshot(process.out.singleton).match("bam_singinterleave_leton") },
+ { assert snapshot(file(process.out.other[0][1]).name).match("bam_interleave_other") },
+ { assert snapshot(process.out.versions).match("bam_verinterleave_sions") }
+ )
+ }
+ }
+
+ test("bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ interleave = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = interleave
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("bam_interleave - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ interleave = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = interleave
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap
new file mode 100644
index 00000000..ff63f9ae
--- /dev/null
+++ b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap
@@ -0,0 +1,287 @@
+{
+ "bam_interlinterleave_eaved": {
+ "content": [
+ [
+ "@ERR5069949.2151832/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "+",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def extension = file(input).getExtension() == 'cram' ?
+ "crai" : args.contains("-c") ? "csi" : "bai"
+ """
+ touch ${input}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml
new file mode 100644
index 00000000..1bed6bca
--- /dev/null
+++ b/modules/nf-core/samtools/index/meta.yml
@@ -0,0 +1,77 @@
+name: samtools_index
+description: Index SAM/BAM/CRAM file
+keywords:
+ - index
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input:
+ type: file
+ description: input file
+ ontologies: []
+output:
+ bai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bai":
+ type: file
+ description: BAM/CRAM/SAM index file
+ pattern: "*.{bai,crai,sai}"
+ ontologies: []
+ csi:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.csi":
+ type: file
+ description: CSI index file
+ pattern: "*.{csi}"
+ ontologies: []
+ crai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.crai":
+ type: file
+ description: BAM/CRAM/SAM index file
+ pattern: "*.{bai,crai,sai}"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@drpatelh"
+ - "@ewels"
+ - "@maxulysse"
+maintainers:
+ - "@drpatelh"
+ - "@ewels"
+ - "@maxulysse"
diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config
new file mode 100644
index 00000000..0ed260ef
--- /dev/null
+++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: SAMTOOLS_INDEX {
+ ext.args = '-c'
+ }
+
+}
diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test
new file mode 100644
index 00000000..ca34fb5c
--- /dev/null
+++ b/modules/nf-core/samtools/index/tests/main.nf.test
@@ -0,0 +1,140 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_INDEX"
+ script "../main.nf"
+ process "SAMTOOLS_INDEX"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/index"
+
+ test("bai") {
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("crai") {
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("csi") {
+ config "./csi.nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.csi[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+ }
+
+ test("bai - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("crai - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("csi - stub") {
+ options "-stub"
+ config "./csi.nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap
new file mode 100644
index 00000000..72d65e81
--- /dev/null
+++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap
@@ -0,0 +1,250 @@
+{
+ "csi - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ],
+ "bai": [
+
+ ],
+ "crai": [
+
+ ],
+ "csi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-16T08:21:25.261127166"
+ },
+ "crai - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ],
+ "bai": [
+
+ ],
+ "crai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-16T08:21:12.653194876"
+ },
+ "bai - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "crai": [
+
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-16T08:21:01.854932651"
+ },
+ "csi": {
+ "content": [
+ "test.paired_end.sorted.bam.csi",
+ [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-16T08:20:51.485364222"
+ },
+ "crai": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ],
+ "bai": [
+
+ ],
+ "crai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-16T08:20:40.518873972"
+ },
+ "bai": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4"
+ ]
+ ],
+ "crai": [
+
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,5e09a6fdf76de396728f877193d72315"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-16T08:20:21.184050361"
+ }
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index cb90bae7..2108dad4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -26,6 +26,19 @@ params {
roi = null
genelists = null
+ // UMI options
+ umi_group_strategy = 'adjacency'
+ callmolecularconsensusreads_min_reads = 3
+ callmolecularconsensusreads_min_baseq = 20
+ filterconsensusreads_min_reads = 3
+ filterconsensusreads_min_baseq = 45
+ filterconsensusreads_min_base_error_rate = 0.2
+
+ // UMI options according to KAPA HyperPlex UMI kit (potentially used in the future)
+ // callmolecularconsensusreads_max_reads = 50
+ // callmolecularconsensusreads_output_per_base_tags = false
+ // callmolecularconsensusreads_read_name_prefix = 'consensus'
+
// References
genomes = [:]
@@ -284,3 +297,15 @@ validation {
// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'
+
+
+// CONSENSUS SUBWORKFLOW - MODULE EXT SETTINGS
+
+process {
+ withName: 'FGBIO_FASTQTOBAM' {
+ ext.prefix = { "${meta.id}_ubam" }
+ }
+ withName: 'FGBIO_COPYUMIFROMREADNAME' {
+ ext.prefix = { "${meta.id}_copyumifromreadname" }
+ }
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 6a2a3685..45322b3a 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -47,6 +47,43 @@
"description": "",
"default": "",
"properties": {
+ "umi_group_strategy": {
+ "type": "string",
+ "description": "Strategy for Mapped Bam => Grouped BAM ('identity', 'edit', 'adjacency', 'paired')",
+ "default": "adjacency",
+ "hidden": true,
+ "enum": ["identity", "edit", "adjacency", "paired"]
+ },
+ "callmolecularconsensusreads_min_reads": {
+ "type": "integer",
+ "default": 3,
+ "description": "Minimum reads for callmolecularconsensusreads",
+ "hidden": true
+ },
+ "callmolecularconsensusreads_min_baseq": {
+ "type": "integer",
+ "default": 20,
+ "description": "Minimum base quality for callmolecularconsensusreads",
+ "hidden": true
+ },
+ "filterconsensusreads_min_reads": {
+ "type": "integer",
+ "default": 3,
+ "hidden": true,
+ "description": "Minimum reads for filterconsensusreads"
+ },
+ "filterconsensusreads_min_baseq": {
+ "type": "integer",
+ "default": 45,
+ "hidden": true,
+ "description": "Minimum base quality for filterconsensusreads"
+ },
+ "filterconsensusreads_min_base_error_rate": {
+ "type": "number",
+ "default": 0.2,
+ "hidden": true,
+ "description": "Minimum base error rate for filterconsensusreads"
+ },
"aligner": {
"type": "string",
"default": "bowtie2",
@@ -61,7 +98,6 @@
},
"umi_aware": {
"type": "boolean",
- "default": "false",
"description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name."
},
"run_coverage": {
@@ -71,8 +107,7 @@
},
"skip_trimming": {
"type": "boolean",
- "description": "Skip adapter trimming",
- "default": false
+ "description": "Skip adapter trimming"
},
"trim_front": {
"type": "integer",
@@ -86,27 +121,22 @@
},
"adapter_R1": {
"type": "string",
- "default": null,
"description": "Adapter sequence to be trimmed"
},
"adapter_R2": {
"type": "string",
- "default": null,
"description": "Adapter sequence to be trimmed"
},
"disable_picard_metrics": {
"type": "boolean",
- "default": false,
"description": "Disable the calculation of (slow) Picard metrics"
},
"roi": {
"type": "string",
- "default": null,
"description": "Region of interest for coverage analysis to be applied to all samples"
},
"genelists": {
"type": "string",
- "default": null,
"exists": true,
"format": "directory-path",
"description": "Directory containing gene list bed files for granular coverage analysis"
diff --git a/nf-test.config b/nf-test.config
index 0688f302..8829a169 100644
--- a/nf-test.config
+++ b/nf-test.config
@@ -1,9 +1,11 @@
config {
+ plugins {
+ load "nft-bam@0.6.0"
+ }
testsDir "tests"
workDir ".nf-test"
configFile "tests/config/nf-test.config"
profile "docker"
options "-dump-channels"
-
}
diff --git a/subworkflows/local/consensus/main.nf b/subworkflows/local/consensus/main.nf
new file mode 100644
index 00000000..5a3e6189
--- /dev/null
+++ b/subworkflows/local/consensus/main.nf
@@ -0,0 +1,214 @@
+#!/usr/bin/env nextflow
+
+include { FGBIO_COPYUMIFROMREADNAME } from '../../../modules/nf-core/fgbio/copyumifromreadname/main'
+include { FGBIO_CALLMOLECULARCONSENSUSREADS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main'
+include { FGBIO_FASTQTOBAM as FASTQTOBAM_READNAME } from '../../../modules/nf-core/fgbio/fastqtobam/main'
+include { FGBIO_FASTQTOBAM as FASTQTOBAM_SEQ } from '../../../modules/nf-core/fgbio/fastqtobam/main'
+include { FGBIO_FILTERCONSENSUSREADS } from '../../../modules/nf-core/fgbio/filterconsensusreads/main'
+include { FGBIO_GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main'
+include { FGBIO_SORTBAM } from '../../../modules/nf-core/fgbio/sortbam/main'
+include { FGBIO_ZIPPERBAMS as FGBIO_ZIPPERBAMS_RAW } from '../../../modules/nf-core/fgbio/zipperbams/main'
+include { FGBIO_ZIPPERBAMS as FGBIO_ZIPPERBAMS_CONS } from '../../../modules/nf-core/fgbio/zipperbams/main'
+include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_RAW } from '../../../modules/nf-core/samtools/fastq/main'
+include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_CONS } from '../../../modules/nf-core/samtools/fastq/main'
+include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_UMI } from '../../../modules/nf-core/samtools/index/main'
+include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_CONSENSUS } from '../../../modules/nf-core/samtools/index/main'
+include { SAMTOOLS_CONVERT as SAMTOOLS_CONVERT } from '../../../modules/nf-core/samtools/convert/main'
+include { FASTQ_ALIGN_DNA as FASTQ_ALIGN_DNA_RAW } from '../../../subworkflows/nf-core/fastq_align_dna/main'
+include { FASTQ_ALIGN_DNA as FASTQ_ALIGN_DNA_CONS } from '../../../subworkflows/nf-core/fastq_align_dna/main'
+
+workflow CONSENSUS {
+ take:
+ ch_umi_fastq // channel: [meta, fastq1, fastq2]
+
+ main:
+ def ch_versions = Channel.empty()
+ def ch_ubam = Channel.empty()
+
+ // 1.1: FASTQ => uBAM
+
+ def ch_fastq = ch_umi_fastq
+ .map { meta, r1, r2 -> tuple(meta, [r1, r2]) }
+ .branch { meta, _fastq ->
+ readname: meta['umi_type'] == 'readname'
+ seq: meta['umi_type'] == 'seq'
+ }
+
+ // Case 1: UMI_in_readname
+ if (ch_fastq.readname) {
+ FASTQTOBAM_READNAME(ch_fastq.readname)
+ ch_versions = ch_versions.mix(FASTQTOBAM_READNAME.out.versions)
+
+ SAMTOOLS_INDEX_UMI(FASTQTOBAM_READNAME.out.bam)
+ ch_versions = ch_versions.mix(SAMTOOLS_INDEX_UMI.out.versions)
+
+ FASTQTOBAM_READNAME.out.bam
+ .join(SAMTOOLS_INDEX_UMI.out.bai, by: 0)
+ .map { meta, bam, bai -> tuple(meta, bam, bai) }
+ .set { ch_ubam_with_bai }
+
+ FGBIO_COPYUMIFROMREADNAME(ch_ubam_with_bai)
+ ch_versions = ch_versions.mix(FGBIO_COPYUMIFROMREADNAME.out.versions)
+
+ ch_ubam = ch_ubam.mix(FGBIO_COPYUMIFROMREADNAME.out.bam)
+ }
+
+ // Case 2: UMI_in_sequence
+
+ if (ch_fastq.seq) {
+ FASTQTOBAM_SEQ(ch_fastq.seq)
+ ch_versions = ch_versions.mix(FASTQTOBAM_SEQ.out.versions)
+
+ ch_ubam = ch_ubam.mix(FASTQTOBAM_SEQ.out.bam)
+ }
+
+ // 1.2: uBAM => Mapped BAM
+
+ SAMTOOLS_FASTQ_RAW(ch_ubam, true)
+
+ ch_versions = ch_versions.mix(SAMTOOLS_FASTQ_RAW.out.versions)
+
+ def ch_reads_aligner_index_fasta = SAMTOOLS_FASTQ_RAW.out.interleaved.map { meta, reads ->
+ def gd = (meta.genome_data instanceof Map) ? meta.genome_data : [:]
+ def alg = (meta.aligner ?: 'bwamem')
+ def fasta = file(gd.fasta, checkIfExists: true)
+ def index = file(gd[alg], checkIfExists: true)
+ tuple(meta, reads, alg, index, fasta)
+ }
+
+ FASTQ_ALIGN_DNA_RAW(ch_reads_aligner_index_fasta, false)
+ ch_versions = ch_versions.mix(FASTQ_ALIGN_DNA_RAW.out.versions)
+
+ def ch_mapped_bam = FASTQ_ALIGN_DNA_RAW.out.bam
+
+ def ch_fasta_by_meta = ch_reads_aligner_index_fasta.map { meta, _r, _a, _i, fasta -> tuple(meta, fasta) }
+
+ def ch_dict_by_meta = ch_reads_aligner_index_fasta.map { meta, _r, _a, _i, _fasta ->
+ def dict = file(meta.genome_data.dict, checkIfExists: true)
+ tuple(meta, dict)
+ }
+ ch_ubam
+ .join(ch_mapped_bam, by:0)
+ .join(ch_fasta_by_meta, by:0)
+ .join(ch_dict_by_meta, by:0)
+ .map { meta, ubam, mapped_bam, fasta, dict -> tuple(meta, ubam, mapped_bam, fasta, dict) }
+ .set { ch_zipperbam }
+
+ FGBIO_ZIPPERBAMS_RAW(ch_zipperbam)
+
+ ch_versions = ch_versions.mix(FGBIO_ZIPPERBAMS_RAW.out.versions)
+
+ // 1.3: Mapped BAM => Grouped BAM
+
+
+ def ch_strategy = Channel.value(params.umi_group_strategy)
+
+ FGBIO_GROUPREADSBYUMI(
+ FGBIO_ZIPPERBAMS_RAW.out.bam,
+ ch_strategy
+ )
+
+ ch_versions = ch_versions.mix(FGBIO_GROUPREADSBYUMI.out.versions)
+ def ch_grouped_bam = FGBIO_GROUPREADSBYUMI.out.bam
+
+ // 2(b).1: GroupedBam -> Filtered Consensus uBam
+ def call_min_reads = Channel.value(params.callmolecularconsensusreads_min_reads)
+ def call_min_baseq = Channel.value(params.callmolecularconsensusreads_min_baseq)
+
+ FGBIO_CALLMOLECULARCONSENSUSREADS(
+ ch_grouped_bam,
+ call_min_reads,
+ call_min_baseq
+ )
+
+ ch_versions = ch_versions.mix(FGBIO_CALLMOLECULARCONSENSUSREADS.out.versions)
+
+ def ch_input_filterconsensusreads = FGBIO_CALLMOLECULARCONSENSUSREADS.out.bam.map {meta, bam ->
+ def fasta = file(meta.genome_data.fasta, checkIfExists: true)
+ tuple(meta, bam, fasta)
+ }
+
+ def filter_min_reads = Channel.value(params.filterconsensusreads_min_reads)
+ def filter_min_baseq = Channel.value(params.filterconsensusreads_min_baseq)
+ def filter_min_base_error_rate = Channel.value(params.filterconsensusreads_min_base_error_rate)
+
+ FGBIO_FILTERCONSENSUSREADS(
+ ch_input_filterconsensusreads,
+ filter_min_reads,
+ filter_min_baseq,
+ filter_min_base_error_rate
+ )
+
+ ch_versions = ch_versions.mix(FGBIO_FILTERCONSENSUSREADS.out.versions)
+
+ ch_filtered_uBam = FGBIO_FILTERCONSENSUSREADS.out.bam
+
+ // 2(b).2: Consensus Filtered uBam -> Consensus Mapped & Filtered BAM
+
+ SAMTOOLS_FASTQ_CONS(ch_filtered_uBam, true)
+ ch_versions = ch_versions.mix(SAMTOOLS_FASTQ_CONS.out.versions)
+
+ def ch_cons_reads_aligner_index_fasta = SAMTOOLS_FASTQ_CONS.out.interleaved.map { meta, reads ->
+ def gd = (meta.genome_data instanceof Map) ? meta.genome_data : [:]
+ def alg = (meta.aligner ?: 'bwamem')
+ def fasta = file(gd.fasta, checkIfExists: true)
+ def index = file(gd[alg], checkIfExists: true)
+ tuple(meta, reads, alg, index, fasta)
+ }
+
+ FASTQ_ALIGN_DNA_CONS(ch_cons_reads_aligner_index_fasta, false)
+ ch_versions = ch_versions.mix(FASTQ_ALIGN_DNA_CONS.out.versions)
+
+ def ch_cons_mapped_bam = FASTQ_ALIGN_DNA_CONS.out.bam
+ def ch_cons_fasta_by_meta = ch_cons_reads_aligner_index_fasta.map { meta, _r, _a, _i, fasta -> tuple(meta, fasta) }
+ def ch_cons_dict_by_meta = ch_cons_reads_aligner_index_fasta.map { meta, _r, _a, _i, _fasta ->
+ def dict = file(meta.genome_data.dict, checkIfExists: true)
+ tuple(meta, dict)
+ }
+
+ ch_filtered_uBam
+ .join(ch_cons_mapped_bam, by: 0)
+ .join(ch_cons_fasta_by_meta, by: 0)
+ .join(ch_cons_dict_by_meta, by: 0)
+ .map { meta, ubam, mapped_bam, fasta, dict -> tuple(meta, ubam, mapped_bam, fasta, dict) }
+ .set { ch_cons_zipperbam }
+
+ FGBIO_ZIPPERBAMS_CONS(ch_cons_zipperbam)
+ ch_versions = ch_versions.mix(FGBIO_ZIPPERBAMS_CONS.out.versions)
+
+ FGBIO_SORTBAM(FGBIO_ZIPPERBAMS_CONS.out.bam)
+ ch_versions = ch_versions.mix(FGBIO_SORTBAM.out.versions)
+
+ def ch_consensus_filtered_bam = FGBIO_SORTBAM.out.bam
+
+ // Consensus_filtered_bam into CRAM (integration into pipeline)
+
+ SAMTOOLS_INDEX_CONSENSUS(ch_consensus_filtered_bam)
+ ch_versions = ch_versions.mix(SAMTOOLS_INDEX_CONSENSUS.out.versions)
+
+ def ch_sam_convert_bai_fasta_fai = SAMTOOLS_INDEX_CONSENSUS.out.bai.map {meta, bai ->
+ def gd = (meta.genome_data instanceof Map) ? meta.genome_data : [:]
+ def fasta = file(gd.fasta, checkIfExists: true)
+ def fai = file(gd.fai, checkIfExists: true)
+ tuple(meta, bai, fasta, fai)
+ }
+
+ def ch_consensus_bam_convert = ch_consensus_filtered_bam
+ .join(ch_sam_convert_bai_fasta_fai, by: 0)
+
+ SAMTOOLS_CONVERT(ch_consensus_bam_convert)
+ ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions)
+
+ SAMTOOLS_CONVERT.out.cram
+ .join(SAMTOOLS_CONVERT.out.crai, by: 0)
+ .map { meta, cram, crai -> tuple(meta, cram, crai) }
+ .set { ch_consensus_cram_crai }
+
+ emit:
+ ubam = ch_ubam
+ consensus_bam = ch_consensus_filtered_bam
+ grouped_bam = ch_grouped_bam
+ filtered_ubam = ch_filtered_uBam
+ consensus_cram_crai = ch_consensus_cram_crai
+ versions = ch_versions
+}
diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config
index 68e597f9..c429ebe1 100644
--- a/tests/config/nf-test.config
+++ b/tests/config/nf-test.config
@@ -14,3 +14,16 @@ aws {
connectionTimeout = 60000
}
}
+
+params {
+ genomes {
+ GRCh38 {
+ bwamem = "s3://test-data/genomics/homo_sapiens/genome/bwa/"
+ dict = "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict"
+ fai = "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai"
+ fasta = "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna"
+ star = "s3://test-data/genomics/homo_sapiens/genome/star/"
+ gtf = "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.gtf"
+ }
+ }
+}
diff --git a/tests/subworkflows/local/consensus/main.nf.test b/tests/subworkflows/local/consensus/main.nf.test
new file mode 100644
index 00000000..67be60b7
--- /dev/null
+++ b/tests/subworkflows/local/consensus/main.nf.test
@@ -0,0 +1,54 @@
+nextflow_workflow {
+ name "Test Subworkflow CONSENSUS"
+ script "subworkflows/local/consensus/main.nf"
+ workflow "CONSENSUS"
+
+ tag "subworkflows"
+ tag "consensus"
+ tag "fgbio"
+ tag "umi"
+ tag "bwamem"
+
+ test("test_consensus_reads_md5") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [
+ id : 'umi_sample1',
+ samplename: 'umi_test1',
+ library : 'test',
+ organism : 'Homo sapiens',
+ umi_type : 'seq',
+ tag : 'WES',
+ aligner : 'bwamem',
+ genome_data: [
+ fasta : params.genomes.GRCh38.fasta,
+ dict : params.genomes.GRCh38.dict,
+ bwamem: params.genomes.GRCh38.bwamem,
+ fai : params.genomes.GRCh38.fai
+ ]
+ ],
+ file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz'),
+ file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz')
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll {
+ assert workflow.success
+ assert snapshot(
+ workflow.out.ubam.collect { bam(it[1]).getReadsMD5() },
+ workflow.out.grouped_bam.collect { bam(it[1]).getReadsMD5() },
+ workflow.out.filtered_ubam.collect { bam(it[1]).getReadsMD5() },
+ workflow.out.consensus_bam.collect { bam(it[1]).getReadsMD5() },
+ workflow.out.consensus_cram_crai.collect { bam(it[1]).getReadsMD5() },
+ workflow.out.versions
+ ).match()
+ }
+ }
+ }
+}
diff --git a/tests/subworkflows/local/consensus/main.nf.test.snap b/tests/subworkflows/local/consensus/main.nf.test.snap
new file mode 100644
index 00000000..166b52a1
--- /dev/null
+++ b/tests/subworkflows/local/consensus/main.nf.test.snap
@@ -0,0 +1,41 @@
+{
+ "test_consensus_reads_md5": {
+ "content": [
+ [
+ "c21b5ba7ecc759f9d729a98420921e1e"
+ ],
+ [
+ "d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ "d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ "d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ "d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ "versions.yml:md5,37ec8cc6d3cdb55f06e84f325f511538",
+ "versions.yml:md5,383381d42173a31f86cce3ab6cf7299e",
+ "versions.yml:md5,4af2c1a5032fc1ce5ec09368f956480c",
+ "versions.yml:md5,5d6ccca9089e3268bb387820e8ba8f57",
+ "versions.yml:md5,5fa2a734f2d547c5de3b14c3a2dc5c02",
+ "versions.yml:md5,7e52768cb8257260977e38d745e51237",
+ "versions.yml:md5,92ec3e7cd231a6503321464eb83e6955",
+ "versions.yml:md5,a21193341a7ae0f02ad5ce8af54c1d4a",
+ "versions.yml:md5,ae3cd5c66636afc64cedd4e633531f2a",
+ "versions.yml:md5,b081ec66fb0e82580f58170743e4b910",
+ "versions.yml:md5,d4e922f9ba8e8b393a2994eed8fd89ae",
+ "versions.yml:md5,faa4838294afcfe9d81dc85e268903e5",
+ "versions.yml:md5,faf05dada3834ff602901efc1a334351"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.6"
+ },
+ "timestamp": "2025-08-26T11:16:54.790224281"
+ }
+}
\ No newline at end of file
diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf
index d23a17b1..bb054bb9 100644
--- a/workflows/preprocessing.nf
+++ b/workflows/preprocessing.nf
@@ -20,6 +20,8 @@ include { COVERAGE } from '../subworkflows/local/coverage/main'
include { FASTQ_TO_UCRAM } from '../subworkflows/local/fastq_to_unaligned_cram/main'
include { FASTQ_TO_CRAM } from '../subworkflows/local/fastq_to_aligned_cram/main'
+include { CONSENSUS } from '../subworkflows/local/consensus/main'
+
// Functions
include { paramsSummaryMap } from 'plugin/nf-schema'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -126,13 +128,14 @@ workflow PREPROCESSING {
rg = rg + [ 'SM': samplename,
'LB': meta.library ?: "",
'PL': meta.platform ?: rg.PL,
- 'ID': meta.readgroup ?: rg.ID
+ 'ID': rg.ID
]
def meta_with_readgroup = meta + ['single_end': single_end, 'readgroup': rg]
return [meta_with_readgroup, fastq]
}
.set {ch_input_fastq}
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// ASSOCIATE CORRECT GENOME AND COUNT SAMPLE REPLICATES
@@ -192,6 +195,26 @@ workflow PREPROCESSING {
ch_fastq_per_sample.dump(tag:"FASTQ per sample", pretty: true)
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// STEP: UMI CONSENSUS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+ // Convert to [meta, r1, r2] for the UMI subworkflow
+ def ch_umi_fastq = ch_fastq_per_sample.map { meta, reads ->
+ def r1 = (reads instanceof List) ? reads[0] : reads
+ def r2 = (reads instanceof List && reads.size() > 1) ? reads[1] : []
+ return [meta, r1, r2]
+ }
+ .filter {meta, _r1, _r2 -> meta.umi_type != "none"}
+
+ CONSENSUS(ch_umi_fastq)
+ ch_versions = ch_versions.mix(CONSENSUS.out.versions)
+
+ def ch_consensus_cram_crai = CONSENSUS.out.consensus_cram_crai
+
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// FASTQ TRIMMING AND QC
@@ -244,7 +267,6 @@ workflow PREPROCESSING {
FASTQ_TO_UCRAM(ch_trimmed_reads.other)
ch_versions = ch_versions.mix(FASTQ_TO_UCRAM.out.versions)
-/*
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -281,6 +303,7 @@ workflow PREPROCESSING {
*/
FASTQ_TO_CRAM.out.cram_crai
+ .mix(ch_consensus_cram_crai)
.filter{ meta, cram, crai ->
meta.tag != "SNP"
}
@@ -438,7 +461,12 @@ def readgroup_from_fastq(path) {
}
assert line.startsWith('@')
line = line.substring(1)
- def fields = line.split(':')
+
+ def parts = line.split(' ')
+ def left = parts[0]
+ def right = parts.size() > 1 ? parts[1] : ""
+
+ def fields = left.split(':')
def rg = [:]
rg.CN = "CMGG"
@@ -449,7 +477,11 @@ def readgroup_from_fastq(path) {
def run_nubmer = fields[1]
def fcid = fields[2]
def lane = fields[3]
- def index = fields[-1] =~ /[GATC+-]/ ? fields[-1] : ""
+ def index = ""
+ if (right) {
+ def r = right.split(':')
+ if (r && (r[-1] ==~ /[ACGTN+\-]+/)) index = r[-1]
+ }
rg.ID = [fcid,lane].join(".")
rg.PU = [fcid, lane, index].findAll().join(".")
@@ -457,6 +489,15 @@ def readgroup_from_fastq(path) {
} else if (fields.size() == 5) {
def fcid = fields[0]
rg.ID = fcid
+ rg.PU = fcid
+ rg.PL = "ILLUMINA"
+ }
+ else {
+ // fallback para cabeceras no-CASAVA: usa el primer campo no vacĂo
+ def fallback = (fields && fields[0]) ? fields[0] : "unknown"
+ rg.ID = fallback
+ rg.PU = fallback
+ rg.PL = rg.PL ?: "ILLUMINA"
}
return rg
}