Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
917c7aa
feat: add amas and macros
jchchiu Nov 6, 2025
bb629f6
test: add simple working test case
jchchiu Nov 6, 2025
be4db1f
fix: change category to Multiple Alignments
jchchiu Nov 6, 2025
932783d
fix: change category to Sequence Analysis
jchchiu Nov 6, 2025
793a6b1
update from george
jchchiu Nov 7, 2025
af75ec9
update from george; add tests
jchchiu Nov 7, 2025
a45c8b5
update from george; add info.xml
jchchiu Nov 7, 2025
e816d9c
fix lint
jchchiu Nov 7, 2025
9967a62
add split test; update .shed; add comment to xml command
jchchiu Nov 7, 2025
8e937d7
update .shed owners
jchchiu Nov 7, 2025
a6ff62e
remove translate
jchchiu Nov 7, 2025
c354605
docs: update .shed
jchchiu Nov 11, 2025
a4fc62f
refactor: split concat into separate tool
jchchiu Nov 11, 2025
6a56045
refactor: add input and output format as shared macro
jchchiu Nov 11, 2025
426a577
refactor: add macro for changing output format
jchchiu Nov 11, 2025
c757008
refactor: move info to macros
jchchiu Nov 11, 2025
1509d85
refactor: change tool id/name; remove info macro
jchchiu Nov 11, 2025
6872743
docs: update categories; reduce actions
jchchiu Nov 11, 2025
c77e246
refactor: rename output format
jchchiu Nov 11, 2025
582d254
refactor: move 'split' subcommand into separate tool
jchchiu Nov 11, 2025
bc9bebd
refactor: change output pattern
jchchiu Nov 11, 2025
dc15ac1
refactor: move 'replicate' subcommand into separate tool
jchchiu Nov 11, 2025
a279552
docs: add more help to explain what partitions are
jchchiu Nov 11, 2025
1d901f5
refactor: move 'summary' subcommand into separate tool
jchchiu Nov 12, 2025
77241c3
temp: move 'remove' subcommand into separate tool
jchchiu Nov 12, 2025
17c02f2
fix: change version to correct token
jchchiu Nov 12, 2025
91c5fe3
refactor: remove redundant xmls
jchchiu Nov 12, 2025
62a9bce
refactor: remove/add reused/redundant macros
jchchiu Nov 12, 2025
a12ab96
docs: update help/documentation
jchchiu Nov 12, 2025
f071907
docs: update help
jchchiu Nov 12, 2025
0bb5c40
test: remove tests no longer needed
jchchiu Nov 12, 2025
653c992
refactor: change 'remove' repeat to text + regex validator
jchchiu Nov 12, 2025
178d5cc
fix: fix misplaced end param tag
jchchiu Nov 12, 2025
81cbb66
docs: updated help for 'remove'
jchchiu Nov 12, 2025
8f32f1d
docs: update help info
jchchiu Nov 12, 2025
46502d3
refactor: add profile token to macro; replace in subcommands
jchchiu Nov 12, 2025
587bb5d
refactor: change param 'name' to 'argument' for 'boolean'
jchchiu Nov 12, 2025
6fc566f
docs: rename output label so that it is more user friendly
jchchiu Nov 12, 2025
5809b0a
Revert "docs: rename output label so that it is more user friendly"
jchchiu Nov 12, 2025
e2de21b
docs: rename output label so that it is more user friendly
jchchiu Nov 12, 2025
b3c6135
docs: add auto_tool_repositories and suite to shed.yml
jchchiu Nov 17, 2025
4b43895
refactor: run everything in ./; added ftype to tests
jchchiu Nov 17, 2025
f6a85d5
refactor: changed check_align and data_type to macro
jchchiu Nov 17, 2025
08bd74d
refactor: moved shared commands to macro tokens
jchchiu Nov 17, 2025
b19d9b7
refactor/docs: moved shared help to macro token
jchchiu Nov 17, 2025
b61f0ff
refactor: added ${tool.name} on ${on_string} to output labels
jchchiu Nov 17, 2025
846b254
docs: updated file format formatting to be more consistent
jchchiu Nov 17, 2025
eec0620
style: removed single quotes
jchchiu Nov 17, 2025
8364cfe
docs: updated docs to include info on sequential vs interleaved; fixe…
jchchiu Nov 17, 2025
834f114
docs: moved partitions help to macro token
jchchiu Nov 17, 2025
2d2349b
refactor: set format depending on part_format
jchchiu Nov 19, 2025
0e62561
style: changed formatting of output files
jchchiu Nov 19, 2025
4af9562
fix: updated version command
jchchiu Nov 19, 2025
cfcfca9
tests: changed concat test from sim size to exact
jchchiu Nov 19, 2025
d4b84ac
refactor: simplified change_format
jchchiu Nov 19, 2025
51bb36e
fix: updated/fixed concat test
jchchiu Nov 19, 2025
ff762fb
fix: added nex format to allowed inputs for partitions
jchchiu Nov 19, 2025
3d9424b
docs: updated help
jchchiu Nov 19, 2025
18a8396
style: fix lint
jchchiu Nov 19, 2025
bd9a818
fix: split subcommand does not work with RAxML or NEXUS formatted par…
jchchiu Nov 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions tools/amas/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
categories:
- Sequence Analysis
description: AMAS high-throughput alignment manipulation and summaries for phylogenomics
homepage_url: https://github.com/marekborowiec/AMAS
long_description: Handle expansive phylogenomic data sets by concatenating, converting,
trimming, translating, replicating, splitting, and summarising large nucleotide or amino acid alignments.
name: amas
owner: biohackathon25aus-george-joy-julian-patra
remote_repository_url: https://github.com/jchchiu/tools-iuc.git
212 changes: 212 additions & 0 deletions tools/amas/amas.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
<?xml version="1.0" encoding="UTF-8"?>
<tool id="amas" name="AMAS" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="25.0">
<description>High-throughput alignment manipulation and statistics for large phylogenomics</description>
<macros>
<import>info.xml</import>
<import>macros.xml</import>
<import>tests.xml</import>
</macros>

<xrefs>
<xref type="bio.tools">amas</xref>
</xrefs>

<expand macro="requirements" />
<expand macro="version_command" />
<expand macro="command" />

<inputs>
<param name="input_files" type="data" format="fasta,phylip,nex" label="Sequences to analyse" multiple="true" help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
<conditional name="action">
<param name="action_selector" type="select" label="Action to perform">
<option value="concat" selected="true">Concatenate input alignments</option>
<option value="summary">Write alignment summary</option>
<option value="convert">Convert to other file format</option>
<option value="replicate">Create replicate data sets for phylogenetic jackknife</option>
<option value="split">Split alignment according to a partitions file</option>
<option value="remove">Remove taxa from alignment</option>
<option value="translate">Translate DNA alignment into protein alignment</option>
<option value="trim">Trim alignment by occupancy</option>
</param>
<when value="concat">
<param name="concat_out_format" type="select" label="Format of the concatenated alignment">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
<param name="part_format" type="select" label="Format of the partitions file">
<option value="nexus">nexus</option>
<option value="raxml">raxml</option>
<option value="unspecified" selected="true">unspecified</option>
</param>
</when>
<when value="summary">
<param name="by_taxon" type="boolean" label="Also emit per-taxon summaries" checked="false" truevalue="true" falsevalue="false" />
</when>
<when value="convert">
<param name="convert_out_format" type="select" label="Format of the converted alignments">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
</when>
<when value="replicate">
<param name="replicate_replicates" type="integer" value="10" min="1" label="Number of replicate datasets to build" />
<param name="replicate_loci" type="integer" value="2" min="1" label="Number of loci per replicate" />
<param name="replicate_out_format" type="select" label="Format of the replicate alignments">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
</when>
<when value="split">
<param name="split_by" type="data" format="txt,nex" label="Partitions file for splitting" help="Provide the partitions file written by AMAS or another tool." />
<param name="remove_empty" type="boolean" label="Remove taxa that are entirely missing within a partition" checked="false" truevalue="--remove-empty" falsevalue="" />
<param name="split_out_format" type="select" label="Format of the split alignments">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
</when>
<when value="remove">
<repeat name="taxa_to_remove" title="Taxa to remove" min="1">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing to consider for repeats is that they limit applicability in workflows, ie AFAIK the number of repeat elements can not be changed in a workflow. Maybe just make it a simple text parameter + regex validator ensuring that we have a space separated list of strings (btw can taxa names not contain spaces)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(btw can taxa names not contain spaces)?

Not an expert in this area but I think it depends on whatever file format you're using. Also depends on whether a tool will handle it (e.g. amas will convert spaces in the taxon/species names inside the input files into underscores.)

<param name="taxon" type="text" label="Taxon name" />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A validator (regex) would be great here.

</repeat>
<param name="out_prefix" type="text" value="reduced_" label="Prefix for reduced alignments" />
<param name="remove_out_format" type="select" label="Format of the reduced alignments">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
</when>
<when value="translate">
<param name="genetic_code" type="select" label="NCBI genetic code">
<option value="1" selected="true">1: Standard</option>
<option value="2">2: Vertebrate mitochondrial</option>
<option value="3">3: Yeast mitochondrial</option>
<option value="4">4: Mold/Protozoan/Coelenterate mito + Mycoplasma</option>
<option value="5">5: Invertebrate mitochondrial</option>
<option value="6">6: Ciliate/Hexamita nuclear</option>
<option value="9">9: Echinoderm/Flatworm mitochondrial</option>
<option value="10">10: Euplotid nuclear</option>
<option value="11">11: Bacterial/Archaeal/Plastid</option>
<option value="12">12: Alternative yeast nuclear</option>
<option value="13">13: Ascidian mitochondrial</option>
<option value="14">14: Alternative flatworm mitochondrial</option>
<option value="16">16: Chlorophycean mitochondrial</option>
<option value="21">21: Trematode mitochondrial</option>
<option value="22">22: Scenedesmus obliquus mitochondrial</option>
<option value="23">23: Thraustochytrium mitochondrial</option>
<option value="24">24: Pterobranchia mitochondrial</option>
<option value="25">25: Candidate Division SR1 and Gracilibacteria</option>
<option value="26">26: Pachysolen tannophilus nuclear</option>
</param>
<param name="reading_frame" type="select" label="Reading frame">
<option value="1" selected="true">Frame 1 (start at first base)</option>
<option value="2">Frame 2 (start at second base)</option>
<option value="3">Frame 3 (start at third base)</option>
</param>
<param name="translate_out_format" type="select" label="Format of the translated alignments">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
</when>
<when value="trim">
<param name="trim_fraction" type="float" min="0.0" max="1.0" value="0.6" label="Minimum column occupancy to keep" help="Columns with occupancy lower than this value are removed." />
<param name="retain_parsimony" type="boolean" label="Keep only parsimony-informative columns" checked="false" truevalue="true" falsevalue="false" />
<param name="trim_out_format" type="select" label="Format of the trimmed alignments">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
</when>
</conditional>
<param name="in_format" type="select" label="Format of the input file">
<option value="fasta">fasta</option>
<option value="phylip">phylip</option>
<option value="phylip-int">phylip-int</option>
<option value="nexus">nexus(sequential)</option>
<option value="nexus-int">nexus(interleaved)</option>
</param>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fasta phylip and nexus can be distinguishe automatically, e.g. $input_file.ext gives the Galaxy datatype. Is the info on interleaved/not needed? Can it be determined automatically?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/marekborowiec/AMAS/blob/2e93d31638625135aa48a68251c363ac23a47c4a/amas/AMAS.py#L692

It doesn't seem like they have a function that detects the format for interleaved automatically; instead it depends on the input you give it. Can galaxy automatically distinguish interleaved?

<param name="data_type" type="select" label="Data type">
<option value="aa">Protein alignments</option>
<option value="dna">Nucleotide alignments</option>
</param>
<param name="check_align" type="boolean" label="Check if input sequences are aligned" checked="false" truevalue="--check-align" falsevalue="" />
<param name="cores" type="integer" value="1" min="1" label="Number of cores to use when summarising alignments" />
</inputs>

<outputs>
<data name="concat_output" from_work_dir="run_dir/concat/concatenated.out" format="txt" label="Concatenated alignment">
<filter>action["action_selector"] == "concat"</filter>
<change_format>
<when input="concat_out_format" value="fasta" format="fasta" />
<when input="concat_out_format" value="phylip" format="phylip" />
<when input="concat_out_format" value="phylip-int" format="phylip" />
<when input="concat_out_format" value="nexus" format="nex" />
<when input="concat_out_format" value="nexus-int" format="nex" />
</change_format>
</data>
<data name="partitions_out" from_work_dir="run_dir/concat/partitions.txt" format="txt" label="Partition file">
<filter>action["action_selector"] == "concat"</filter>
</data>

<data name="summary_out" from_work_dir="run_dir/summary/summary.txt" format="txt" label="Alignment summary">
<filter>action["action_selector"] == "summary"</filter>
</data>
<collection name="taxon_summaries" type="list" label="Per-taxon summaries">
<discover_datasets directory="run_dir/summary" pattern="(?P&lt;name&gt;.+-seq-summary)\.txt" format="txt" />
<filter>action["action_selector"] == "summary" and action.get("by_taxon", False)</filter>
</collection>

<collection name="converted_alignments" type="list" label="Converted alignments">
<discover_datasets directory="run_dir/convert" pattern="(?P&lt;name&gt;.+)-out\..+" format="data" />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should set the format instead if format="data"

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you have a look at the amas_split.xml at L55; is this what you were thinking?

<filter>action["action_selector"] == "convert"</filter>
</collection>

<collection name="replicate_alignments" type="list" label="Replicate alignments">
<discover_datasets directory="run_dir/replicate" pattern="(?P&lt;name&gt;replicate.+-out\..+)" format="data" />
<filter>action["action_selector"] == "replicate"</filter>
</collection>

<collection name="split_alignments" type="list" label="Split alignment files">
<discover_datasets directory="run_dir/split" pattern="(?P&lt;name&gt;.+)-out\..+" format="data" />
<filter>action["action_selector"] == "split"</filter>
</collection>

<collection name="reduced_alignments" type="list" label="Reduced alignments">
<discover_datasets directory="run_dir/remove" pattern="(?P&lt;name&gt;.+)-out\..+" format="data" />
<filter>action["action_selector"] == "remove"</filter>
</collection>

<collection name="translated_alignments" type="list" label="Translated alignments">
<discover_datasets directory="run_dir/translate" pattern="(?P&lt;name&gt;translated_.+-out\..+)" format="data" />
<filter>action["action_selector"] == "translate"</filter>
</collection>

<collection name="trimmed_alignments" type="list" label="Trimmed alignments">
<discover_datasets directory="run_dir/trim" pattern="(?P&lt;name&gt;trimmed_.+-out\..+)" format="data" />
<filter>action["action_selector"] == "trim"</filter>
</collection>
</outputs>

<expand macro="tests" />

<expand macro="help" />
<expand macro="citations" />
</tool>
38 changes: 38 additions & 0 deletions tools/amas/info.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8"?>
<macros>
<token name="@TOOL_VERSION@">1.0</token>
<token name="@WRAPPER_VERSION@">1</token>

<xml name="help">
<help><![CDATA[
AMAS (Alignment Manipulation And Summary) was written for modern phylogenomics workflows, where hundreds of taxa and thousands of loci are routinely analysed and summarised. It runs as a lean Python 3 program that depends only on the standard library yet still utilises multiprocessing to chew through very large amino-acid or nucleotide alignments. Drawing on Borowiec 2016 (PeerJ 4:e1660), AMAS combines format conversions, sequence manipulation, partition-aware slicing, replicate generation, and rich summary statistics (taxon counts, alignment length, matrix cells, missing data, AT/GC content, variable and parsimony-informative sites, plus alphabet frequencies). The wrapper surfaces the following sub-commands:

* **concat** — concatenate multiple alignments, emitting `concatenated.out` plus `partitions.txt`. Choose *Format of the concatenated alignment* to control the container (AMAS 1.0 does not expose codon partition flags at the CLI level, so they are not shown here).
* **summary** — write `summary.txt` reporting the key counts listed above; optionally enable *per-taxon summaries* for `<input>-seq-summary.txt` files.
* **convert** — re-encode each alignment as fasta/phylip/nexus (sequential or interleaved) using AMAS’ native `<input>-out.<ext>` naming.
* **replicate** — generate phylogenetic jack-knife replicates by sampling loci; users set the number of replicates and loci per replicate.
* **split** — divide a concatenated alignment using a partitions file (format `unspecified`, `nexus`, or `raxml`) and optionally drop taxa that are entirely missing within a partition.
* **remove** — drop one or more taxa by name, prefixing resulting files with `reduced_` unless a custom prefix is supplied.
* **translate** — translate DNA alignments to amino acids with the chosen NCBI genetic code (1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 16, 21, 22, 23, 24, 25, 26) and reading frame.
* **trim** — prune columns with occupancy below the nominated threshold and optionally retain only parsimony-informative sites (`AMAS trim` behaviour). Output files are named `trimmed_<input>-out.<ext>`.

Common arguments:

1. `--in-files` (multi-select parameter in Galaxy).
2. `--in-format` — `fasta`, `phylip`, `nexus`, `phylip-int`, or `nexus-int`.
3. `--data-type` — `dna` or `aa`.
4. `--check-align` verifies that sequences are aligned (disabled by default for performance).
5. `--cores` controls the multiprocessing pool used when summarising alignments (set to 1 for deterministic Galaxy tests).

Regression expectations from the upstream `amas/tests/tests.py` suite (e.g., `summary.txt`, `fasta1.fas-seq-summary.txt`) are bundled under `test-data/outputs/` so this wrapper stays in lock-step with the reference implementation.

AMAS source code and manual: https://github.com/marekborowiec/AMAS
]]></help>
</xml>

<xml name="citations">
<citations>
<citation type="doi">10.7717/peerj.1660</citation>
</citations>
</xml>
</macros>
84 changes: 84 additions & 0 deletions tools/amas/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?xml version="1.0" encoding="UTF-8"?>
<macros>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">amas</requirement>
</requirements>
</xml>

<xml name="version_command">
<version_command>python -m amas.AMAS -h</version_command>
</xml>

<xml name="command">
<command detect_errors="exit_code"><![CDATA[
set -eu;

rm -rf "run_dir/${action.action_selector}";
mkdir -p "run_dir/${action.action_selector}";

## Create symlinks with original filename for consistent tests because
## input filenames are used as str vars in the partitions.txt output
#for $f in $input_files
ln -s '${f}' "run_dir/${action.action_selector}/${f.element_identifier}";
#end for

cd "run_dir/${action.action_selector}";

python -m amas.AMAS
#if $action.action_selector == "concat":
concat
--concat-part partitions.txt
--concat-out concatenated.out
--part-format $part_format
--out-format $concat_out_format
#elif $action.action_selector == "convert":
convert
--out-format $convert_out_format
#elif $action.action_selector == "replicate":
replicate
--rep-aln $replicate_replicates $replicate_loci
--out-format $replicate_out_format
#elif $action.action_selector == "split":
split
--split-by '$split_by'
$remove_empty
--out-format $split_out_format
#elif $action.action_selector == "summary":
summary
--summary-out summary.txt
#if str($by_taxon) == "true":
--by-taxon
#end if
#elif $action.action_selector == "remove":
remove
--out-prefix '$out_prefix'
--out-format $remove_out_format
--taxa-to-remove
#for $taxon in $taxa_to_remove
'$taxon.taxon'
#end for
#elif $action.action_selector == "translate":
translate
--code $genetic_code
--reading-frame $reading_frame
--out-format $translate_out_format
#elif $action.action_selector == "trim":
trim
--trim-fraction $trim_fraction
#if str($retain_parsimony) == "true":
--retain-only-parsimony-sites
#end if
--out-format $trim_out_format
#end if
--in-files
#for $f in $input_files
'${f.element_identifier}'
#end for
--in-format $in_format
--data-type $data_type
--cores $cores
$check_align
]]></command>
</xml>
</macros>
6 changes: 6 additions & 0 deletions tools/amas/test-data/inputs/aa_alignment1.faa
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>Spec1
ACDEFGHIKLMN
>Spec2
ACDEYGHIKLMN
>Spec3
ACDEFGHIKLMQ
6 changes: 6 additions & 0 deletions tools/amas/test-data/inputs/aa_alignment1.int-phy
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
3 12

Spec1 ACDEFGHIKLMN
Spec2 ACDEYGHIKLMN
Spec3 ACDEFGHIKLMQ

6 changes: 6 additions & 0 deletions tools/amas/test-data/inputs/aa_alignment2.faa
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>SpecA
MSTNPKPQRKST
>SpecB
MSTNPKPQRKSA
>SpecC
MSTDPKPQRKST
6 changes: 6 additions & 0 deletions tools/amas/test-data/inputs/aa_alignment2.int-phy
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
3 12

SpecA MSTNPKPQRKST
SpecB MSTNPKPQRKSA
SpecC MSTDPKPQRKST

Loading