Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,18 @@ This can be useful if you have assigned cell type annotations to the integrated

If you want to run tasks after the integration step without performing integration, you can provide a previous result of the pipeline as [`base_adata`](https://nf-co.re/scdownstream/parameters#base_adata).
You do not need to provide a samplesheet via the [`input`](https://nf-co.re/scdownstream/parameters#input) parameter in this case.
You also need [`base_embeddings`](https://nf-co.re/scdownstream/parameters#base_embeddings), and optionally [`base_label_col`](https://nf-co.re/scdownstream/parameters#base_label_col) and [`base_condition_col`](https://nf-co.re/scdownstream/parameters#base_condition_col) if your label or condition columns are not named `label` and `condition`.
You also need either [`base_embeddings`](https://nf-co.re/scdownstream/parameters#base_embeddings) to reuse existing embeddings, or [`integrate_per_label`](https://nf-co.re/scdownstream/parameters#integrate_per_label) to compute new integrations independently for each group in [`base_label_col`](https://nf-co.re/scdownstream/parameters#base_label_col).
Set [`base_condition_col`](https://nf-co.re/scdownstream/parameters#base_condition_col) if your condition column is not named `condition`.

The pipeline will then re-execute the tasks after the integration step without performing integration again.
Most interestingly, the pipeline will generate cell type specific UMAPs, clusterings, and PAGA graphs, if [`clustering_per_label`](https://nf-co.re/scdownstream/parameters#clustering_per_label) is set to `true`.

If [`integrate_per_label`](https://nf-co.re/scdownstream/parameters#integrate_per_label) is enabled, [`base_label_col`](https://nf-co.re/scdownstream/parameters#base_label_col) is the split/grouping column, not necessarily the supervised cell-type label used by integration methods.
Use [`base_batch_col`](https://nf-co.re/scdownstream/parameters#base_batch_col) to select the batch column for batch-aware integration methods.
When using scANVI, use [`base_scanvi_label_col`](https://nf-co.re/scdownstream/parameters#base_scanvi_label_col) and [`base_scanvi_unknown_label`](https://nf-co.re/scdownstream/parameters#base_scanvi_unknown_label) to select the supervised labels and unlabeled category.
Subset names in `analysis_plan` match the filesystem-safe keys produced by splitting the AnnData object; spaces in group values are replaced with underscores.
Per-label integrations are treated as already split for clustering, so the pipeline creates subset-specific embedding keys such as `X_pca-SRR28679756_pca` and UMAP keys such as `X_pca-SRR28679756_umap` in the finalized base AnnData.

### GPU acceleration

:::warning{title="Experimental feature"}
Expand Down
12 changes: 12 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ workflow NFCORE_SCDOWNSTREAM {
base_embeddings // value: string
base_label_col // value: string
base_condition_col // value: string
base_batch_col // value: string
base_scanvi_label_col // value: string
base_scanvi_unknown_label // value: string
integrate_per_label // value: boolean
cluster_per_label // value: boolean
cluster_global // value: boolean
clustering_resolutions // value: string
Expand Down Expand Up @@ -128,6 +132,10 @@ workflow NFCORE_SCDOWNSTREAM {
base_embeddings,
base_label_col,
base_condition_col,
base_batch_col,
base_scanvi_label_col,
base_scanvi_unknown_label,
integrate_per_label,
cluster_per_label,
cluster_global,
clustering_resolutions,
Expand Down Expand Up @@ -228,6 +236,10 @@ workflow {
params.base_embeddings,
params.base_label_col,
params.base_condition_col,
params.base_batch_col,
params.base_scanvi_label_col,
params.base_scanvi_unknown_label,
params.integrate_per_label,
params.cluster_per_label,
params.cluster_global,
params.clustering_resolutions,
Expand Down
2 changes: 1 addition & 1 deletion modules/local/scvitools/scanvi/templates/scanvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

adata = ad.read_h5ad("${h5ad}")
reference_model_path = "reference_model"
reference_model_type = "${meta2.id ?: ''}"
reference_model_type = "${meta2.integration ?: meta2.id ?: ''}"

plan_kwargs = {}

Expand Down
4 changes: 4 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ params {
base_embeddings = null
base_label_col = 'label'
base_condition_col = 'condition'
base_batch_col = 'batch'
base_scanvi_label_col = 'label'
base_scanvi_unknown_label = 'Unknown'
integrate_per_label = false

// Clustering options
clustering_resolutions = '0.5,1.0'
Expand Down
26 changes: 23 additions & 3 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,17 +225,37 @@
"base_label_col": {
"type": "string",
"default": "label",
"description": "The column in the base AnnData object that contains the label (e.g. cell type) information."
"description": "The column in the base AnnData object used to group cells for downstream per-label analysis and, when integrate_per_label is true, to split the object before integration."
},
"base_condition_col": {
"type": "string",
"default": "condition",
"description": "The column in the base AnnData object that contains the condition (e.g. disease state, treatment) information."
},
"base_batch_col": {
"type": "string",
"default": "batch",
"description": "The column in the base AnnData object that contains batch information for integration methods used with integrate_per_label."
},
"base_scanvi_label_col": {
"type": "string",
"default": "label",
"description": "The column in the base AnnData object that contains cell labels for scANVI when integrate_per_label is true and scanvi is selected."
},
"base_scanvi_unknown_label": {
"type": "string",
"default": "Unknown",
"description": "The category in base_scanvi_label_col that should be treated as unlabeled by scANVI when integrate_per_label is true."
},
"integrate_per_label": {
"type": "boolean",
"default": false,
"description": "In base_adata-only runs, split the base AnnData by base_label_col and run the selected integration_methods independently for each group before clustering."
},
"base_embeddings": {
"type": "string",
"description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if `input` is not provided - otherwise it is ignored.",
"help_text": "If the `input` parameter is not provided (no new data to add), integration will not be performed. In order to be able to utilize existing integration results, you need to provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).",
"description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if input is not provided and integrate_per_label is false; otherwise it is ignored.",
"help_text": "If the `input` parameter is not provided (no new data to add), integration is skipped unless `integrate_per_label` is true. To reuse existing integration results, provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).",
"pattern": "^((scvi|scanvi|symphony|bbknn|combat|seurat)(,(scvi|scanvi|symphony|bbknn|combat|seurat))*)?$"
}
}
Expand Down
35 changes: 30 additions & 5 deletions subworkflows/local/cluster/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,25 @@ workflow CLUSTER {
ch_multiqc_files = channel.empty()
ch_h5ad = channel.empty()

ch_input_by_subset = ch_input.branch { meta, _h5ad ->
already_split: meta.subset != null
needs_split: true
}

ch_h5ad = ch_h5ad.mix(
ch_input_by_subset.already_split
.map { meta, h5ad -> [meta + [already_split: true], h5ad] }
)

if (global) {
ch_h5ad = ch_h5ad
.mix(ch_input
.mix(ch_input_by_subset.needs_split
.map { meta, h5ad -> [meta + [subset: "global"], h5ad] })
}

if (per_label) {
SPLITCOL (
ch_input,
ch_input_by_subset.needs_split,
split_col
)

Expand All @@ -42,7 +52,10 @@ workflow CLUSTER {
ch_h5ad = ch_h5ad
.map {
meta, h5ad ->
[meta + [id: meta.integration + "-" + meta.subset], h5ad]
def cluster_id = meta.subset != null
? meta.integration + "-" + meta.subset
: meta.integration
[meta + [id: cluster_id], h5ad]
}

ch_h5ad = ch_h5ad.branch { meta, _h5ad ->
Expand All @@ -57,14 +70,26 @@ workflow CLUSTER {
ch_h5ad = NEIGHBORS.out.h5ad.mix(ch_h5ad.has_neighbors)
ch_h5ad_neighbours = NEIGHBORS.out.h5ad

ch_h5ad_for_umap = ch_h5ad
.map { meta, h5ad ->
meta.already_split
? [meta + [id: meta.id + "-umap", cluster_id: meta.id], h5ad]
: [meta, h5ad]
}

UMAP (
ch_h5ad
ch_h5ad_for_umap
)
ch_obsm = ch_obsm.mix(UMAP.out.obsm)

ch_resolutions = channel.fromList(default_resolutions)

ch_h5ad_for_leiden = UMAP.out.h5ad
.map { meta, h5ad ->
meta.cluster_id
? [meta.findAll { key, _value -> key != 'cluster_id' } + [id: meta.cluster_id], h5ad]
: [meta, h5ad]
}
.combine(ch_resolutions)
.filter { meta, _h5ad, resolution ->
analysis_plan_rows.any { row ->
Expand All @@ -77,7 +102,7 @@ workflow CLUSTER {
[
meta + [
resolution: resolution,
id: meta.integration + "-" + meta.subset + "-" + resolution,
id: meta.id + "-" + resolution,
],
h5ad,
]
Expand Down
8 changes: 7 additions & 1 deletion subworkflows/local/combine/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ workflow COMBINE {
symphony_reference // value: string
expimap_gmt // value: string
condition_col // value: string
batch_col // value: string
scanvi_label_col // value: string
scanvi_unlabeled_category // value: string
scib // value: boolean

main:
Expand Down Expand Up @@ -55,7 +58,10 @@ workflow COMBINE {
scimilarity_model,
symphony_reference,
expimap_gmt,
condition_col
condition_col,
batch_col,
scanvi_label_col,
scanvi_unlabeled_category
)
ch_var = ch_var.mix(INTEGRATE.out.var)

Expand Down
10 changes: 8 additions & 2 deletions subworkflows/local/combine/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ nextflow_workflow {
input[11] = null
input[12] = null
input[13] = 'condition'
input[14] = false
input[14] = 'batch'
input[15] = 'label'
input[16] = 'Unknown'
input[17] = false
"""
}
}
Expand Down Expand Up @@ -71,7 +74,10 @@ nextflow_workflow {
input[11] = null
input[12] = null
input[13] = 'condition'
input[14] = false
input[14] = 'batch'
input[15] = 'label'
input[16] = 'Unknown'
input[17] = false
"""
}
}
Expand Down
Loading
Loading