-
Notifications
You must be signed in to change notification settings - Fork 487
New tool addition: amas tool #7443
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
917c7aa
bb629f6
be4db1f
932783d
793a6b1
af75ec9
a45c8b5
e816d9c
9967a62
8e937d7
a6ff62e
c354605
a4fc62f
6a56045
426a577
c757008
1509d85
6872743
c77e246
582d254
bc9bebd
dc15ac1
a279552
1d901f5
77241c3
17c02f2
91c5fe3
62a9bce
a12ab96
f071907
0bb5c40
653c992
178d5cc
81cbb66
8f32f1d
46502d3
587bb5d
6fc566f
5809b0a
e2de21b
b3c6135
4b43895
f6a85d5
08bd74d
b19d9b7
b61f0ff
846b254
eec0620
8364cfe
834f114
2d2349b
0e62561
4af9562
cfcfca9
d4b84ac
51bb36e
ff762fb
3d9424b
18a8396
bd9a818
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| categories: | ||
| - Sequence Analysis | ||
| description: AMAS high-throughput alignment manipulation and summaries for phylogenomics | ||
| homepage_url: https://github.com/marekborowiec/AMAS | ||
| long_description: Handle expansive phylogenomic data sets by concatenating, converting, | ||
| trimming, translating, replicating, splitting, and summarising large nucleotide or amino acid alignments. | ||
| name: amas | ||
| owner: biohackathon25aus-george-joy-julian-patra | ||
| remote_repository_url: https://github.com/jchchiu/tools-iuc.git |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,212 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| <tool id="amas" name="AMAS" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="25.0"> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| <description>High-throughput alignment manipulation and statistics for large phylogenomics</description> | ||
| <macros> | ||
| <import>info.xml</import> | ||
| <import>macros.xml</import> | ||
| <import>tests.xml</import> | ||
| </macros> | ||
|
|
||
| <xrefs> | ||
| <xref type="bio.tools">amas</xref> | ||
| </xrefs> | ||
|
|
||
| <expand macro="requirements" /> | ||
| <expand macro="version_command" /> | ||
| <expand macro="command" /> | ||
|
|
||
| <inputs> | ||
| <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequences to analyse" multiple="true" help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." /> | ||
| <conditional name="action"> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| <param name="action_selector" type="select" label="Action to perform"> | ||
| <option value="concat" selected="true">Concatenate input alignments</option> | ||
| <option value="summary">Write alignment summary</option> | ||
| <option value="convert">Convert to other file format</option> | ||
| <option value="replicate">Create replicate data sets for phylogenetic jackknife</option> | ||
| <option value="split">Split alignment according to a partitions file</option> | ||
| <option value="remove">Remove taxa from alignment</option> | ||
| <option value="translate">Translate DNA alignment into protein alignment</option> | ||
| <option value="trim">Trim alignment by occupancy</option> | ||
| </param> | ||
| <when value="concat"> | ||
| <param name="concat_out_format" type="select" label="Format of the concatenated alignment"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
| <param name="part_format" type="select" label="Format of the partitions file"> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| <option value="nexus">nexus</option> | ||
| <option value="raxml">raxml</option> | ||
| <option value="unspecified" selected="true">unspecified</option> | ||
| </param> | ||
| </when> | ||
| <when value="summary"> | ||
| <param name="by_taxon" type="boolean" label="Also emit per-taxon summaries" checked="false" truevalue="true" falsevalue="false" /> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| </when> | ||
| <when value="convert"> | ||
| <param name="convert_out_format" type="select" label="Format of the converted alignments"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
| </when> | ||
| <when value="replicate"> | ||
| <param name="replicate_replicates" type="integer" value="10" min="1" label="Number of replicate datasets to build" /> | ||
| <param name="replicate_loci" type="integer" value="2" min="1" label="Number of loci per replicate" /> | ||
| <param name="replicate_out_format" type="select" label="Format of the replicate alignments"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
| </when> | ||
| <when value="split"> | ||
| <param name="split_by" type="data" format="txt,nex" label="Partitions file for splitting" help="Provide the partitions file written by AMAS or another tool." /> | ||
| <param name="remove_empty" type="boolean" label="Remove taxa that are entirely missing within a partition" checked="false" truevalue="--remove-empty" falsevalue="" /> | ||
| <param name="split_out_format" type="select" label="Format of the split alignments"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| </when> | ||
| <when value="remove"> | ||
| <repeat name="taxa_to_remove" title="Taxa to remove" min="1"> | ||
|
||
| <param name="taxon" type="text" label="Taxon name" /> | ||
|
||
| </repeat> | ||
| <param name="out_prefix" type="text" value="reduced_" label="Prefix for reduced alignments" /> | ||
| <param name="remove_out_format" type="select" label="Format of the reduced alignments"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
| </when> | ||
| <when value="translate"> | ||
| <param name="genetic_code" type="select" label="NCBI genetic code"> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| <option value="1" selected="true">1: Standard</option> | ||
| <option value="2">2: Vertebrate mitochondrial</option> | ||
| <option value="3">3: Yeast mitochondrial</option> | ||
| <option value="4">4: Mold/Protozoan/Coelenterate mito + Mycoplasma</option> | ||
| <option value="5">5: Invertebrate mitochondrial</option> | ||
| <option value="6">6: Ciliate/Hexamita nuclear</option> | ||
| <option value="9">9: Echinoderm/Flatworm mitochondrial</option> | ||
| <option value="10">10: Euplotid nuclear</option> | ||
| <option value="11">11: Bacterial/Archaeal/Plastid</option> | ||
| <option value="12">12: Alternative yeast nuclear</option> | ||
| <option value="13">13: Ascidian mitochondrial</option> | ||
| <option value="14">14: Alternative flatworm mitochondrial</option> | ||
| <option value="16">16: Chlorophycean mitochondrial</option> | ||
| <option value="21">21: Trematode mitochondrial</option> | ||
| <option value="22">22: Scenedesmus obliquus mitochondrial</option> | ||
| <option value="23">23: Thraustochytrium mitochondrial</option> | ||
| <option value="24">24: Pterobranchia mitochondrial</option> | ||
| <option value="25">25: Candidate Division SR1 and Gracilibacteria</option> | ||
| <option value="26">26: Pachysolen tannophilus nuclear</option> | ||
| </param> | ||
| <param name="reading_frame" type="select" label="Reading frame"> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| <option value="1" selected="true">Frame 1 (start at first base)</option> | ||
| <option value="2">Frame 2 (start at second base)</option> | ||
| <option value="3">Frame 3 (start at third base)</option> | ||
| </param> | ||
| <param name="translate_out_format" type="select" label="Format of the translated alignments"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
| </when> | ||
| <when value="trim"> | ||
| <param name="trim_fraction" type="float" min="0.0" max="1.0" value="0.6" label="Minimum column occupancy to keep" help="Columns with occupancy lower than this value are removed." /> | ||
| <param name="retain_parsimony" type="boolean" label="Keep only parsimony-informative columns" checked="false" truevalue="true" falsevalue="false" /> | ||
| <param name="trim_out_format" type="select" label="Format of the trimmed alignments"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
| </when> | ||
| </conditional> | ||
| <param name="in_format" type="select" label="Format of the input file"> | ||
| <option value="fasta">fasta</option> | ||
| <option value="phylip">phylip</option> | ||
| <option value="phylip-int">phylip-int</option> | ||
| <option value="nexus">nexus(sequential)</option> | ||
| <option value="nexus-int">nexus(interleaved)</option> | ||
| </param> | ||
|
||
| <param name="data_type" type="select" label="Data type"> | ||
| <option value="aa">Protein alignments</option> | ||
| <option value="dna">Nucleotide alignments</option> | ||
| </param> | ||
| <param name="check_align" type="boolean" label="Check if input sequences are aligned" checked="false" truevalue="--check-align" falsevalue="" /> | ||
| <param name="cores" type="integer" value="1" min="1" label="Number of cores to use when summarising alignments" /> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| </inputs> | ||
|
|
||
| <outputs> | ||
| <data name="concat_output" from_work_dir="run_dir/concat/concatenated.out" format="txt" label="Concatenated alignment"> | ||
| <filter>action["action_selector"] == "concat"</filter> | ||
| <change_format> | ||
| <when input="concat_out_format" value="fasta" format="fasta" /> | ||
| <when input="concat_out_format" value="phylip" format="phylip" /> | ||
| <when input="concat_out_format" value="phylip-int" format="phylip" /> | ||
| <when input="concat_out_format" value="nexus" format="nex" /> | ||
| <when input="concat_out_format" value="nexus-int" format="nex" /> | ||
| </change_format> | ||
| </data> | ||
| <data name="partitions_out" from_work_dir="run_dir/concat/partitions.txt" format="txt" label="Partition file"> | ||
| <filter>action["action_selector"] == "concat"</filter> | ||
| </data> | ||
|
|
||
| <data name="summary_out" from_work_dir="run_dir/summary/summary.txt" format="txt" label="Alignment summary"> | ||
| <filter>action["action_selector"] == "summary"</filter> | ||
| </data> | ||
| <collection name="taxon_summaries" type="list" label="Per-taxon summaries"> | ||
| <discover_datasets directory="run_dir/summary" pattern="(?P<name>.+-seq-summary)\.txt" format="txt" /> | ||
| <filter>action["action_selector"] == "summary" and action.get("by_taxon", False)</filter> | ||
| </collection> | ||
|
|
||
| <collection name="converted_alignments" type="list" label="Converted alignments"> | ||
| <discover_datasets directory="run_dir/convert" pattern="(?P<name>.+)-out\..+" format="data" /> | ||
|
||
| <filter>action["action_selector"] == "convert"</filter> | ||
| </collection> | ||
|
|
||
| <collection name="replicate_alignments" type="list" label="Replicate alignments"> | ||
| <discover_datasets directory="run_dir/replicate" pattern="(?P<name>replicate.+-out\..+)" format="data" /> | ||
| <filter>action["action_selector"] == "replicate"</filter> | ||
| </collection> | ||
|
|
||
| <collection name="split_alignments" type="list" label="Split alignment files"> | ||
| <discover_datasets directory="run_dir/split" pattern="(?P<name>.+)-out\..+" format="data" /> | ||
| <filter>action["action_selector"] == "split"</filter> | ||
| </collection> | ||
|
|
||
| <collection name="reduced_alignments" type="list" label="Reduced alignments"> | ||
| <discover_datasets directory="run_dir/remove" pattern="(?P<name>.+)-out\..+" format="data" /> | ||
| <filter>action["action_selector"] == "remove"</filter> | ||
| </collection> | ||
|
|
||
| <collection name="translated_alignments" type="list" label="Translated alignments"> | ||
| <discover_datasets directory="run_dir/translate" pattern="(?P<name>translated_.+-out\..+)" format="data" /> | ||
| <filter>action["action_selector"] == "translate"</filter> | ||
| </collection> | ||
|
|
||
| <collection name="trimmed_alignments" type="list" label="Trimmed alignments"> | ||
| <discover_datasets directory="run_dir/trim" pattern="(?P<name>trimmed_.+-out\..+)" format="data" /> | ||
| <filter>action["action_selector"] == "trim"</filter> | ||
| </collection> | ||
| </outputs> | ||
|
|
||
| <expand macro="tests" /> | ||
|
|
||
| <expand macro="help" /> | ||
| <expand macro="citations" /> | ||
| </tool> | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <macros> | ||
| <token name="@TOOL_VERSION@">1.0</token> | ||
| <token name="@WRAPPER_VERSION@">1</token> | ||
|
|
||
| <xml name="help"> | ||
| <help><![CDATA[ | ||
| AMAS (Alignment Manipulation And Summary) was written for modern phylogenomics workflows, where hundreds of taxa and thousands of loci are routinely analysed and summarised. It runs as a lean Python 3 program that depends only on the standard library yet still utilises multiprocessing to chew through very large amino-acid or nucleotide alignments. Drawing on Borowiec 2016 (PeerJ 4:e1660), AMAS combines format conversions, sequence manipulation, partition-aware slicing, replicate generation, and rich summary statistics (taxon counts, alignment length, matrix cells, missing data, AT/GC content, variable and parsimony-informative sites, plus alphabet frequencies). The wrapper surfaces the following sub-commands: | ||
|
|
||
| * **concat** — concatenate multiple alignments, emitting `concatenated.out` plus `partitions.txt`. Choose *Format of the concatenated alignment* to control the container (AMAS 1.0 does not expose codon partition flags at the CLI level, so they are not shown here). | ||
| * **summary** — write `summary.txt` reporting the key counts listed above; optionally enable *per-taxon summaries* for `<input>-seq-summary.txt` files. | ||
| * **convert** — re-encode each alignment as fasta/phylip/nexus (sequential or interleaved) using AMAS’ native `<input>-out.<ext>` naming. | ||
| * **replicate** — generate phylogenetic jack-knife replicates by sampling loci; users set the number of replicates and loci per replicate. | ||
| * **split** — divide a concatenated alignment using a partitions file (format `unspecified`, `nexus`, or `raxml`) and optionally drop taxa that are entirely missing within a partition. | ||
| * **remove** — drop one or more taxa by name, prefixing resulting files with `reduced_` unless a custom prefix is supplied. | ||
| * **translate** — translate DNA alignments to amino acids with the chosen NCBI genetic code (1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 16, 21, 22, 23, 24, 25, 26) and reading frame. | ||
| * **trim** — prune columns with occupancy below the nominated threshold and optionally retain only parsimony-informative sites (`AMAS trim` behaviour). Output files are named `trimmed_<input>-out.<ext>`. | ||
|
|
||
| Common arguments: | ||
|
|
||
| 1. `--in-files` (multi-select parameter in Galaxy). | ||
| 2. `--in-format` — `fasta`, `phylip`, `nexus`, `phylip-int`, or `nexus-int`. | ||
| 3. `--data-type` — `dna` or `aa`. | ||
| 4. `--check-align` verifies that sequences are aligned (disabled by default for performance). | ||
| 5. `--cores` controls the multiprocessing pool used when summarising alignments (set to 1 for deterministic Galaxy tests). | ||
|
|
||
| Regression expectations from the upstream `amas/tests/tests.py` suite (e.g., `summary.txt`, `fasta1.fas-seq-summary.txt`) are bundled under `test-data/outputs/` so this wrapper stays in lock-step with the reference implementation. | ||
|
|
||
| AMAS source code and manual: https://github.com/marekborowiec/AMAS | ||
| ]]></help> | ||
| </xml> | ||
|
|
||
| <xml name="citations"> | ||
| <citations> | ||
| <citation type="doi">10.7717/peerj.1660</citation> | ||
| </citations> | ||
| </xml> | ||
| </macros> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <macros> | ||
| <xml name="requirements"> | ||
| <requirements> | ||
| <requirement type="package" version="@TOOL_VERSION@">amas</requirement> | ||
| </requirements> | ||
| </xml> | ||
|
|
||
| <xml name="version_command"> | ||
| <version_command>python -m amas.AMAS -h</version_command> | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| </xml> | ||
|
|
||
| <xml name="command"> | ||
| <command detect_errors="exit_code"><![CDATA[ | ||
| set -eu; | ||
|
|
||
| rm -rf "run_dir/${action.action_selector}"; | ||
| mkdir -p "run_dir/${action.action_selector}"; | ||
|
|
||
| ## Create symlinks with original filename for consistent tests because | ||
| ## input filenames are used as str vars in the partitions.txt output | ||
| #for $f in $input_files | ||
| ln -s '${f}' "run_dir/${action.action_selector}/${f.element_identifier}"; | ||
jchchiu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| #end for | ||
|
|
||
| cd "run_dir/${action.action_selector}"; | ||
|
|
||
| python -m amas.AMAS | ||
| #if $action.action_selector == "concat": | ||
| concat | ||
| --concat-part partitions.txt | ||
| --concat-out concatenated.out | ||
| --part-format $part_format | ||
| --out-format $concat_out_format | ||
| #elif $action.action_selector == "convert": | ||
| convert | ||
| --out-format $convert_out_format | ||
| #elif $action.action_selector == "replicate": | ||
| replicate | ||
| --rep-aln $replicate_replicates $replicate_loci | ||
| --out-format $replicate_out_format | ||
| #elif $action.action_selector == "split": | ||
| split | ||
| --split-by '$split_by' | ||
| $remove_empty | ||
| --out-format $split_out_format | ||
| #elif $action.action_selector == "summary": | ||
| summary | ||
| --summary-out summary.txt | ||
| #if str($by_taxon) == "true": | ||
| --by-taxon | ||
| #end if | ||
| #elif $action.action_selector == "remove": | ||
| remove | ||
| --out-prefix '$out_prefix' | ||
| --out-format $remove_out_format | ||
| --taxa-to-remove | ||
| #for $taxon in $taxa_to_remove | ||
| '$taxon.taxon' | ||
| #end for | ||
| #elif $action.action_selector == "translate": | ||
| translate | ||
| --code $genetic_code | ||
| --reading-frame $reading_frame | ||
| --out-format $translate_out_format | ||
| #elif $action.action_selector == "trim": | ||
| trim | ||
| --trim-fraction $trim_fraction | ||
| #if str($retain_parsimony) == "true": | ||
| --retain-only-parsimony-sites | ||
| #end if | ||
| --out-format $trim_out_format | ||
| #end if | ||
| --in-files | ||
| #for $f in $input_files | ||
| '${f.element_identifier}' | ||
| #end for | ||
| --in-format $in_format | ||
| --data-type $data_type | ||
| --cores $cores | ||
| $check_align | ||
| ]]></command> | ||
| </xml> | ||
| </macros> | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| >Spec1 | ||
| ACDEFGHIKLMN | ||
| >Spec2 | ||
| ACDEYGHIKLMN | ||
| >Spec3 | ||
| ACDEFGHIKLMQ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| 3 12 | ||
|
|
||
| Spec1 ACDEFGHIKLMN | ||
| Spec2 ACDEYGHIKLMN | ||
| Spec3 ACDEFGHIKLMQ | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| >SpecA | ||
| MSTNPKPQRKST | ||
| >SpecB | ||
| MSTNPKPQRKSA | ||
| >SpecC | ||
| MSTDPKPQRKST |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| 3 12 | ||
|
|
||
| SpecA MSTNPKPQRKST | ||
| SpecB MSTNPKPQRKSA | ||
| SpecC MSTDPKPQRKST | ||
|
|
Uh oh!
There was an error while loading. Please reload this page.