galaxyproject · jchchiu · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/tools/amas/.shed.yml b/tools/amas/.shed.yml
@@ -0,0 +1,9 @@
+categories: 
+ - Sequence Analysis
+description: AMAS high-throughput alignment manipulation and summaries for phylogenomics
+homepage_url: https://github.com/marekborowiec/AMAS
+long_description: Handle expansive phylogenomic data sets by concatenating, converting, 
+  trimming, translating, replicating, splitting, and summarising large nucleotide or amino acid alignments.
+name: amas
+owner: biohackathon25aus-george-joy-julian-patra
+remote_repository_url: https://github.com/jchchiu/tools-iuc.git
diff --git a/tools/amas/amas.xml b/tools/amas/amas.xml
@@ -0,0 +1,212 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="amas" name="AMAS" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="25.0">
+    <description>High-throughput alignment manipulation and statistics for large phylogenomics</description>
+    <macros>
+        <import>info.xml</import>
+        <import>macros.xml</import>
+        <import>tests.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <expand macro="command" />
+
+    <inputs>
+        <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequences to analyse" multiple="true" help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <conditional name="action">
+            <param name="action_selector" type="select" label="Action to perform">
+                <option value="concat" selected="true">Concatenate input alignments</option>
+                <option value="summary">Write alignment summary</option>
+                <option value="convert">Convert to other file format</option>
+                <option value="replicate">Create replicate data sets for phylogenetic jackknife</option>
+                <option value="split">Split alignment according to a partitions file</option>
+                <option value="remove">Remove taxa from alignment</option>
+                <option value="translate">Translate DNA alignment into protein alignment</option>
+                <option value="trim">Trim alignment by occupancy</option>
+            </param>
+            <when value="concat">
+                <param name="concat_out_format" type="select" label="Format of the concatenated alignment">
+                    <option value="fasta">fasta</option>
+                    <option value="phylip">phylip</option>
+                    <option value="phylip-int">phylip-int</option>
+                    <option value="nexus">nexus(sequential)</option>
+                    <option value="nexus-int">nexus(interleaved)</option>
+                </param>
+                <param name="part_format" type="select" label="Format of the partitions file">
+                    <option value="nexus">nexus</option>
+                    <option value="raxml">raxml</option>
+                    <option value="unspecified" selected="true">unspecified</option>
+                </param>
+            </when>
+            <when value="summary">
+                <param name="by_taxon" type="boolean" label="Also emit per-taxon summaries" checked="false" truevalue="true" falsevalue="false" />
+            </when>
+            <when value="convert">
+                <param name="convert_out_format" type="select" label="Format of the converted alignments">
+                    <option value="fasta">fasta</option>
+                    <option value="phylip">phylip</option>
+                    <option value="phylip-int">phylip-int</option>
+                    <option value="nexus">nexus(sequential)</option>
+                    <option value="nexus-int">nexus(interleaved)</option>
+                </param>
+            </when>
+            <when value="replicate">
+                <param name="replicate_replicates" type="integer" value="10" min="1" label="Number of replicate datasets to build" />
+                <param name="replicate_loci" type="integer" value="2" min="1" label="Number of loci per replicate" />
+                <param name="replicate_out_format" type="select" label="Format of the replicate alignments">
+                    <option value="fasta">fasta</option>
+                    <option value="phylip">phylip</option>
+                    <option value="phylip-int">phylip-int</option>
+                    <option value="nexus">nexus(sequential)</option>
+                    <option value="nexus-int">nexus(interleaved)</option>
+                </param>
+            </when>
+            <when value="split">
+                <param name="split_by" type="data" format="txt,nex" label="Partitions file for splitting" help="Provide the partitions file written by AMAS or another tool." />
+                <param name="remove_empty" type="boolean" label="Remove taxa that are entirely missing within a partition" checked="false" truevalue="--remove-empty" falsevalue="" />
+                <param name="split_out_format" type="select" label="Format of the split alignments">
+                    <option value="fasta">fasta</option>
+                    <option value="phylip">phylip</option>
+                    <option value="phylip-int">phylip-int</option>
+                    <option value="nexus">nexus(sequential)</option>
+                    <option value="nexus-int">nexus(interleaved)</option>
+                </param>
+            </when>
+            <when value="remove">
+                <repeat name="taxa_to_remove" title="Taxa to remove" min="1">
+                    <param name="taxon" type="text" label="Taxon name" />
+                </repeat>
+                <param name="out_prefix" type="text" value="reduced_" label="Prefix for reduced alignments" />
+                <param name="remove_out_format" type="select" label="Format of the reduced alignments">
+                    <option value="fasta">fasta</option>
+                    <option value="phylip">phylip</option>
+                    <option value="phylip-int">phylip-int</option>
+                    <option value="nexus">nexus(sequential)</option>
+                    <option value="nexus-int">nexus(interleaved)</option>
+                </param>
+            </when>
+            <when value="translate">
+                <param name="genetic_code" type="select" label="NCBI genetic code">
+                    <option value="1" selected="true">1: Standard</option>
+                    <option value="2">2: Vertebrate mitochondrial</option>
+                    <option value="3">3: Yeast mitochondrial</option>
+                    <option value="4">4: Mold/Protozoan/Coelenterate mito + Mycoplasma</option>
+                    <option value="5">5: Invertebrate mitochondrial</option>
+                    <option value="6">6: Ciliate/Hexamita nuclear</option>
+                    <option value="9">9: Echinoderm/Flatworm mitochondrial</option>
+                    <option value="10">10: Euplotid nuclear</option>
+                    <option value="11">11: Bacterial/Archaeal/Plastid</option>
+                    <option value="12">12: Alternative yeast nuclear</option>
+                    <option value="13">13: Ascidian mitochondrial</option>
+                    <option value="14">14: Alternative flatworm mitochondrial</option>
+                    <option value="16">16: Chlorophycean mitochondrial</option>
+                    <option value="21">21: Trematode mitochondrial</option>
+                    <option value="22">22: Scenedesmus obliquus mitochondrial</option>
+                    <option value="23">23: Thraustochytrium mitochondrial</option>
+                    <option value="24">24: Pterobranchia mitochondrial</option>
+                    <option value="25">25: Candidate Division SR1 and Gracilibacteria</option>
+                    <option value="26">26: Pachysolen tannophilus nuclear</option>
+                </param>
+                <param name="reading_frame" type="select" label="Reading frame">
+                    <option value="1" selected="true">Frame 1 (start at first base)</option>
+                    <option value="2">Frame 2 (start at second base)</option>
+                    <option value="3">Frame 3 (start at third base)</option>
+                </param>
+                <param name="translate_out_format" type="select" label="Format of the translated alignments">
+                    <option value="fasta">fasta</option>
+                    <option value="phylip">phylip</option>
+                    <option value="phylip-int">phylip-int</option>
+                    <option value="nexus">nexus(sequential)</option>
+                    <option value="nexus-int">nexus(interleaved)</option>
+                </param>
+            </when>
+            <when value="trim">
+                <param name="trim_fraction" type="float" min="0.0" max="1.0" value="0.6" label="Minimum column occupancy to keep" help="Columns with occupancy lower than this value are removed." />
+                <param name="retain_parsimony" type="boolean" label="Keep only parsimony-informative columns" checked="false" truevalue="true" falsevalue="false" />
+                <param name="trim_out_format" type="select" label="Format of the trimmed alignments">
+                    <option value="fasta">fasta</option>
+                    <option value="phylip">phylip</option>
+                    <option value="phylip-int">phylip-int</option>
+                    <option value="nexus">nexus(sequential)</option>
+                    <option value="nexus-int">nexus(interleaved)</option>
+                </param>
+            </when>
+        </conditional>
+        <param name="in_format" type="select" label="Format of the input file">
+            <option value="fasta">fasta</option>
+            <option value="phylip">phylip</option>
+            <option value="phylip-int">phylip-int</option>
+            <option value="nexus">nexus(sequential)</option>
+            <option value="nexus-int">nexus(interleaved)</option>
+        </param>
+        <param name="data_type" type="select" label="Data type">
+            <option value="aa">Protein alignments</option>
+            <option value="dna">Nucleotide alignments</option>
+        </param>
+        <param name="check_align" type="boolean" label="Check if input sequences are aligned" checked="false" truevalue="--check-align" falsevalue="" />
+        <param name="cores" type="integer" value="1" min="1" label="Number of cores to use when summarising alignments" />
+    </inputs>
+
+    <outputs>
+        <data name="concat_output" from_work_dir="run_dir/concat/concatenated.out" format="txt" label="Concatenated alignment">
+            <filter>action["action_selector"] == "concat"</filter>
+            <change_format>
+                <when input="concat_out_format" value="fasta" format="fasta" />
+                <when input="concat_out_format" value="phylip" format="phylip" />
+                <when input="concat_out_format" value="phylip-int" format="phylip" />
+                <when input="concat_out_format" value="nexus" format="nex" />
+                <when input="concat_out_format" value="nexus-int" format="nex" />
+            </change_format>
+        </data>
+        <data name="partitions_out" from_work_dir="run_dir/concat/partitions.txt" format="txt" label="Partition file">
+            <filter>action["action_selector"] == "concat"</filter>
+        </data>
+
+        <data name="summary_out" from_work_dir="run_dir/summary/summary.txt" format="txt" label="Alignment summary">
+            <filter>action["action_selector"] == "summary"</filter>
+        </data>
+        <collection name="taxon_summaries" type="list" label="Per-taxon summaries">
+            <discover_datasets directory="run_dir/summary" pattern="(?P&lt;name&gt;.+-seq-summary)\.txt" format="txt" />
+            <filter>action["action_selector"] == "summary" and action.get("by_taxon", False)</filter>
+        </collection>
+
+        <collection name="converted_alignments" type="list" label="Converted alignments">
+            <discover_datasets directory="run_dir/convert" pattern="(?P&lt;name&gt;.+)-out\..+" format="data" />
+            <filter>action["action_selector"] == "convert"</filter>
+        </collection>
+
+        <collection name="replicate_alignments" type="list" label="Replicate alignments">
+            <discover_datasets directory="run_dir/replicate" pattern="(?P&lt;name&gt;replicate.+-out\..+)" format="data" />
+            <filter>action["action_selector"] == "replicate"</filter>
+        </collection>
+
+        <collection name="split_alignments" type="list" label="Split alignment files">
+            <discover_datasets directory="run_dir/split" pattern="(?P&lt;name&gt;.+)-out\..+" format="data" />
+            <filter>action["action_selector"] == "split"</filter>
+        </collection>
+
+        <collection name="reduced_alignments" type="list" label="Reduced alignments">
+            <discover_datasets directory="run_dir/remove" pattern="(?P&lt;name&gt;.+)-out\..+" format="data" />
+            <filter>action["action_selector"] == "remove"</filter>
+        </collection>
+
+        <collection name="translated_alignments" type="list" label="Translated alignments">
+            <discover_datasets directory="run_dir/translate" pattern="(?P&lt;name&gt;translated_.+-out\..+)" format="data" />
+            <filter>action["action_selector"] == "translate"</filter>
+        </collection>
+
+        <collection name="trimmed_alignments" type="list" label="Trimmed alignments">
+            <discover_datasets directory="run_dir/trim" pattern="(?P&lt;name&gt;trimmed_.+-out\..+)" format="data" />
+            <filter>action["action_selector"] == "trim"</filter>
+        </collection>
+    </outputs>
+
+    <expand macro="tests" />
+
+    <expand macro="help" />
+    <expand macro="citations" />
+</tool>
diff --git a/tools/amas/info.xml b/tools/amas/info.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<macros>
+    <token name="@TOOL_VERSION@">1.0</token>
+    <token name="@WRAPPER_VERSION@">1</token>
+
+    <xml name="help">
+        <help><![CDATA[
+AMAS (Alignment Manipulation And Summary) was written for modern phylogenomics workflows, where hundreds of taxa and thousands of loci are routinely analysed and summarised. It runs as a lean Python 3 program that depends only on the standard library yet still utilises multiprocessing to chew through very large amino-acid or nucleotide alignments. Drawing on Borowiec 2016 (PeerJ 4:e1660), AMAS combines format conversions, sequence manipulation, partition-aware slicing, replicate generation, and rich summary statistics (taxon counts, alignment length, matrix cells, missing data, AT/GC content, variable and parsimony-informative sites, plus alphabet frequencies). The wrapper surfaces the following sub-commands:
+
+* **concat** — concatenate multiple alignments, emitting `concatenated.out` plus `partitions.txt`. Choose *Format of the concatenated alignment* to control the container (AMAS 1.0 does not expose codon partition flags at the CLI level, so they are not shown here).
+* **summary** — write `summary.txt` reporting the key counts listed above; optionally enable *per-taxon summaries* for `<input>-seq-summary.txt` files.
+* **convert** — re-encode each alignment as fasta/phylip/nexus (sequential or interleaved) using AMAS’ native `<input>-out.<ext>` naming.
+* **replicate** — generate phylogenetic jack-knife replicates by sampling loci; users set the number of replicates and loci per replicate.
+* **split** — divide a concatenated alignment using a partitions file (format `unspecified`, `nexus`, or `raxml`) and optionally drop taxa that are entirely missing within a partition.
+* **remove** — drop one or more taxa by name, prefixing resulting files with `reduced_` unless a custom prefix is supplied.
+* **translate** — translate DNA alignments to amino acids with the chosen NCBI genetic code (1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 16, 21, 22, 23, 24, 25, 26) and reading frame.
+* **trim** — prune columns with occupancy below the nominated threshold and optionally retain only parsimony-informative sites (`AMAS trim` behaviour). Output files are named `trimmed_<input>-out.<ext>`.
+
+Common arguments:
+
+1. `--in-files` (multi-select parameter in Galaxy).
+2. `--in-format` — `fasta`, `phylip`, `nexus`, `phylip-int`, or `nexus-int`.
+3. `--data-type` — `dna` or `aa`.
+4. `--check-align` verifies that sequences are aligned (disabled by default for performance).
+5. `--cores` controls the multiprocessing pool used when summarising alignments (set to 1 for deterministic Galaxy tests).
+
+Regression expectations from the upstream `amas/tests/tests.py` suite (e.g., `summary.txt`, `fasta1.fas-seq-summary.txt`) are bundled under `test-data/outputs/` so this wrapper stays in lock-step with the reference implementation.
+
+AMAS source code and manual: https://github.com/marekborowiec/AMAS
+        ]]></help>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.7717/peerj.1660</citation>
+        </citations>
+    </xml>
+</macros>
diff --git a/tools/amas/macros.xml b/tools/amas/macros.xml
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">amas</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>python -m amas.AMAS -h</version_command>
+    </xml>
+
+    <xml name="command">
+        <command detect_errors="exit_code"><![CDATA[
+set -eu;
+
+rm -rf "run_dir/${action.action_selector}";
+mkdir -p "run_dir/${action.action_selector}";
+
+## Create symlinks with original filename for consistent tests because
+##  input filenames are used as str vars in the partitions.txt output
+#for $f in $input_files
+    ln -s '${f}' "run_dir/${action.action_selector}/${f.element_identifier}";
+#end for
+
+cd "run_dir/${action.action_selector}";
+
+python -m amas.AMAS
+#if $action.action_selector == "concat":
+    concat
+    --concat-part partitions.txt
+    --concat-out concatenated.out
+    --part-format $part_format
+    --out-format $concat_out_format
+#elif $action.action_selector == "convert":
+    convert
+    --out-format $convert_out_format
+#elif $action.action_selector == "replicate":
+    replicate
+    --rep-aln $replicate_replicates $replicate_loci
+    --out-format $replicate_out_format
+#elif $action.action_selector == "split":
+    split
+    --split-by '$split_by'
+    $remove_empty
+    --out-format $split_out_format
+#elif $action.action_selector == "summary":
+    summary
+    --summary-out summary.txt
+    #if str($by_taxon) == "true":
+        --by-taxon
+    #end if
+#elif $action.action_selector == "remove":
+    remove
+    --out-prefix '$out_prefix'
+    --out-format $remove_out_format
+    --taxa-to-remove
+    #for $taxon in $taxa_to_remove
+        '$taxon.taxon'
+    #end for
+#elif $action.action_selector == "translate":
+    translate
+    --code $genetic_code
+    --reading-frame $reading_frame
+    --out-format $translate_out_format
+#elif $action.action_selector == "trim":
+    trim
+    --trim-fraction $trim_fraction
+    #if str($retain_parsimony) == "true":
+        --retain-only-parsimony-sites
+    #end if
+    --out-format $trim_out_format
+#end if
+    --in-files
+#for $f in $input_files
+        '${f.element_identifier}'
+#end for
+    --in-format $in_format
+    --data-type $data_type
+    --cores $cores
+    $check_align
+        ]]></command>
+    </xml>
+</macros>
diff --git a/tools/amas/test-data/inputs/aa_alignment1.faa b/tools/amas/test-data/inputs/aa_alignment1.faa
@@ -0,0 +1,6 @@
+>Spec1
+ACDEFGHIKLMN
+>Spec2
+ACDEYGHIKLMN
+>Spec3
+ACDEFGHIKLMQ
diff --git a/tools/amas/test-data/inputs/aa_alignment1.int-phy b/tools/amas/test-data/inputs/aa_alignment1.int-phy
@@ -0,0 +1,6 @@
+3 12
+
+Spec1   ACDEFGHIKLMN
+Spec2   ACDEYGHIKLMN
+Spec3   ACDEFGHIKLMQ
+
diff --git a/tools/amas/test-data/inputs/aa_alignment2.faa b/tools/amas/test-data/inputs/aa_alignment2.faa
@@ -0,0 +1,6 @@
+>SpecA
+MSTNPKPQRKST
+>SpecB
+MSTNPKPQRKSA
+>SpecC
+MSTDPKPQRKST
diff --git a/tools/amas/test-data/inputs/aa_alignment2.int-phy b/tools/amas/test-data/inputs/aa_alignment2.int-phy
@@ -0,0 +1,6 @@
+3 12
+
+SpecA   MSTNPKPQRKST
+SpecB   MSTNPKPQRKSA
+SpecC   MSTDPKPQRKST
+