diff --git a/.github/workflows/github_build.yml b/.github/workflows/github_build.yml index 10127d8..856dbfb 100644 --- a/.github/workflows/github_build.yml +++ b/.github/workflows/github_build.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: [3.12] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -44,17 +44,17 @@ jobs: echo "TEST fdog.setup" fdog.setup -d /home/runner/work/fDOG/fDOG/dt --woFAS echo "TEST fdog.checkData" - fdog.checkData -s /home/runner/work/fDOG/fDOG/dt/searchTaxa_dir -c /home/runner/work/fDOG/fDOG/dt/coreTaxa_dir -a /home/runner/work/fDOG/fDOG/dt/annotation_dir --reblast + fdog.checkData -s /home/runner/work/fDOG/fDOG/dt/searchTaxa_dir -c /home/runner/work/fDOG/fDOG/dt/coreTaxa_dir -a /home/runner/work/fDOG/fDOG/dt/annotation_dir --reblast --ignoreAnno echo "TEST fdog.showTaxa" fdog.showTaxa echo "TEST fdog.run" - fdog.run --seqFile infile.fa --jobName test --refspec HUMAN@9606@3 --fasOff --group mammalia + fdog.run --seqFile infile.fa --jobName test --refspec HUMAN@9606@qfo24_02 --fasOff --group mammalia mkdir seeds path=$(fdog.setup -d ./ --getSourcepath); a="1 2 3"; for i in ${a[@]}; do cp $path/data/infile.fa seeds/$i.fa; done echo "TEST fdogs.run" - fdogs.run --seqFolder seeds --jobName test_multi --refspec HUMAN@9606@3 --fasOff --searchTaxa PARTE@5888@3,THAPS@35128@3 --hmmScoreType sequence + fdogs.run --seqFolder seeds --jobName test_multi --refspec HUMAN@9606@qfo24_02 --fasOff --searchTaxa PARTE@5888@qfo24_02,THAPS@35128@qfo24_02 --hmmScoreType sequence echo "TEST fdog.addTaxon" - head /home/runner/work/fDOG/fDOG/dt/searchTaxa_dir/HUMAN@9606@3/HUMAN@9606@3.fa > hm.fa + head /home/runner/work/fDOG/fDOG/dt/searchTaxa_dir/HUMAN@9606@qfo24_02/HUMAN@9606@qfo24_02.fa > hm.fa fdog.addTaxon -f hm.fa -i 9606 -o ./ -c -a ls - name: Deploy diff --git a/README.md b/README.md index 67b64db..d51cd4d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # fDOG - Feature-aware Directed OrtholoG search +[![published in: MBE](https://img.shields.io/badge/published%20in-MBE-ff69b4)](https://doi.org/10.1093/molbev/msaf120) [![PyPI version](https://badge.fury.io/py/fdog.svg)](https://pypi.org/project/fdog/) [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) -[![Build Status](https://travis-ci.com/BIONF/fDOG.svg?branch=master)](https://travis-ci.com/BIONF/fDOG) ![Github Build](https://github.com/BIONF/fDOG/workflows/build/badge.svg) # Table of Contents @@ -19,7 +19,7 @@ # How to install -*fDOG* tool is distributed as a python package called *fdog*. It is compatible with [Python ≥ v3.7](https://www.python.org/downloads/). +*fDOG* tool is distributed as a python package called *fdog*. It is compatible with [Python ≥ v3.12](https://www.python.org/downloads/). ## Install the fDOG package You can install *fdog* using `pip`: @@ -59,7 +59,7 @@ You will get a warning if any of the dependencies are not ready to use, please s *fdog* will run smoothly with the provided sample input file 'infile.fa' if everything is set correctly. ``` -fdog.run --seqFile infile.fa --jobName test --refspec HUMAN@9606@3 +fdog.run --seqFile infile.fa --jobName test --refspec HUMAN@9606@qfo24_02 ``` The output files with the prefix `test` will be saved at your current working directory. You can have an overview about all available options with the command @@ -71,7 +71,7 @@ Please find more information in [our wiki](https://github.com/BIONF/fDOG/wiki) t # fDOG data set -Within the data package we provide a set of 78 reference taxa. They can be automatically downloaded during the setup. This data comes "ready to use" with the *fdog* framework. Species data must be present in the three directories listed below: +Within the data package we provide a set of [81 reference taxa](https://ftp.ebi.ac.uk/pub/databases/reference_proteomes/QfO/QfO_release_2024_02.tar.gz). They will be automatically downloaded during the setup. This data comes "ready to use" with the *fdog* framework. Species data must be present in the three directories listed below: * searchTaxa_dir (Contains sub-directories for proteome fasta files for each species) * coreTaxa_dir (Contains sub-directories for BLAST databases made with `makeblastdb` out of your proteomes) @@ -79,7 +79,7 @@ Within the data package we provide a set of 78 reference taxa. They can be autom For each species/taxon there is a sub-directory named in accordance to the naming schema ([Species acronym]@[NCBI ID]@[Proteome version]) -*fdog* is not limited to those 78 taxa. If needed the user can manually add further gene sets (multiple fasta format) using provided functions. +*fdog* is not limited to those 81 reference taxa. If needed the user can manually add further gene sets (multiple fasta format) using provided functions. ## Adding a new gene set into fDOG For adding **one gene set**, please use the `fdog.addTaxon` function: @@ -112,7 +112,7 @@ _**NOTE:** After adding new taxa into *fdog*, you should [check for the validity Any bug reports or comments, suggestions are highly appreciated. Please [open an issue on GitHub](https://github.com/BIONF/fDOG/issues/new) or be in touch via email. # How to cite -Ebersberger, I., Strauss, S. & von Haeseler, A. HaMStR: Profile hidden markov model based search for orthologs in ESTs. BMC Evol Biol 9, 157 (2009), [doi:10.1186/1471-2148-9-157](https://doi.org/10.1186/1471-2148-9-157) +Tran V, Langschied F, Muelbaier H, Dosch J, Arthen F, Balint M, Ebersberger I. 2025. Feature architecture-aware ortholog search with fDOG reveals the distribution of plant cell wall-degrading enzymes across life. Molecular Biology and Evolution:msaf120. https://doi.org/10.1093/molbev/msaf120 # Contributors - [Ingo Ebersberger](https://github.com/ebersber) diff --git a/fdog/addTaxa.py b/fdog/addTaxa.py index c03a9a8..e5982c8 100644 --- a/fdog/addTaxa.py +++ b/fdog/addTaxa.py @@ -26,12 +26,11 @@ from Bio import SeqIO import multiprocessing as mp from tqdm import tqdm -from ete3 import NCBITaxa import re import shutil from datetime import datetime import time -from pkg_resources import get_distribution +from importlib.metadata import version, PackageNotFoundError from collections import OrderedDict import fdog.libs.zzz as general_fn @@ -66,8 +65,8 @@ def parse_map_file(mapping_file, folIn): def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.') + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.') required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') required.add_argument('-i', '--input', help='Path to input folder', action='store', default='', required=True) diff --git a/fdog/addTaxon.py b/fdog/addTaxon.py index bd17fe0..3751f2e 100755 --- a/fdog/addTaxon.py +++ b/fdog/addTaxon.py @@ -26,7 +26,7 @@ import shutil import multiprocessing as mp from datetime import datetime -from pkg_resources import get_distribution +from importlib.metadata import version, PackageNotFoundError import fdog.libs.zzz as general_fn import fdog.libs.tree as tree_fn @@ -34,8 +34,8 @@ def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.') + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.') required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') required.add_argument('-f', '--fasta', help='FASTA file of input taxon', action='store', default='', required=True) diff --git a/fdog/checkData.py b/fdog/checkData.py index 74b3b93..755240b 100644 --- a/fdog/checkData.py +++ b/fdog/checkData.py @@ -26,14 +26,12 @@ import subprocess import shutil from Bio import SeqIO -from ete3 import NCBITaxa +from ete4 import NCBITaxa import re from datetime import datetime import multiprocessing as mp from tqdm import tqdm -from pkg_resources import get_distribution -from Bio.Blast.Applications import NcbiblastpCommandline - +from importlib.metadata import version, PackageNotFoundError import fdog.libs.zzz as general_fn import fdog.libs.blast as blast_fn @@ -176,13 +174,18 @@ def run_check_fasta(checkDir, replace, delete, concat): def check_blastdb(args): """ Check for outdated blastdb """ - (query, taxon, coreTaxa_dir, searchTaxa_dir) = args - blast_db = '%s/%s/%s' % (coreTaxa_dir, taxon, taxon) + query, taxon, coreTaxa_dir, searchTaxa_dir = args + blast_db = f"{coreTaxa_dir}/{taxon}/{taxon}" + try: - blastp_cline = NcbiblastpCommandline(query = query, db = blast_db) - stdout, stderr = blastp_cline() - except: + result = subprocess.run( + ["blastp", "-query", query, "-db", blast_db], + capture_output=True, text=True, check=True + ) + return(result.stdout) + except subprocess.CalledProcessError as e: return([query, blast_db]) + fai_in_genome = "%s/%s/%s.fa.fai" % (searchTaxa_dir, taxon, taxon) fai_in_blast = "%s/%s/%s.fa.fai" % (coreTaxa_dir, taxon, taxon) # check if fai_in_blast is a valid symlink @@ -418,8 +421,8 @@ def run_check(args): return(caution) def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.') + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.') parser.add_argument('-s', '--searchTaxa_dir', help='Path to search taxa directory (e.g. fdog_dataPath/searchTaxa_dir)', action='store', default='') parser.add_argument('-c', '--coreTaxa_dir', help='Path to blastDB directory (e.g. fdog_dataPath/coreTaxa_dir)', action='store', default='') parser.add_argument('-a', '--annotation_dir', help='Path to feature annotation directory (e.g. fdog_dataPath/annotation_dir)', action='store', default='') diff --git a/fdog/libs/addtaxon.py b/fdog/libs/addtaxon.py index 995eeb1..2da4066 100644 --- a/fdog/libs/addtaxon.py +++ b/fdog/libs/addtaxon.py @@ -20,7 +20,6 @@ from pathlib import Path from Bio import SeqIO import subprocess -from ete3 import NCBITaxa import re from datetime import datetime from collections import OrderedDict diff --git a/fdog/libs/blast.py b/fdog/libs/blast.py index 2cb4609..e40532a 100644 --- a/fdog/libs/blast.py +++ b/fdog/libs/blast.py @@ -17,7 +17,6 @@ import os import sys -from Bio.Blast.Applications import NcbiblastpCommandline import xml.etree.ElementTree as ET import subprocess @@ -29,21 +28,21 @@ def do_blastsearch( """ Perform blastp search for a query fasta file Return an XML string contains blast result """ - filter = 'no' - if lowComplexityFilter == True: - filter = 'yes' + filter_value = "yes" if lowComplexityFilter else "no" try: - blastp_cline = NcbiblastpCommandline( - query = query, db = blast_db, evalue = evalBlast, seg = filter, - max_target_seqs = 10, outfmt = 5) - stdout, stderr = blastp_cline() - return(stdout) - except: - sys.exit( - 'ERROR: Error running blastp search for %s against %s\n%s' - % (query, blast_db, NcbiblastpCommandline( - query = query, db = blast_db, evalue = evalBlast, seg = filter, - max_target_seqs = 10, outfmt = 5))) + cmd = [ + "blastp", + "-query", query, + "-db", blast_db, + "-evalue", str(evalBlast), + "-seg", filter_value, + "-max_target_seqs", "10", + "-outfmt", "5" + ] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return result.stdout + except subprocess.CalledProcessError as e: + sys.exit(f"ERROR: Error running BLASTP search for {query} against {blast_db}\n{e.stderr}") def parse_blast_xml(blast_output): diff --git a/fdog/libs/corecompile.py b/fdog/libs/corecompile.py index bd57e07..99e9bbd 100644 --- a/fdog/libs/corecompile.py +++ b/fdog/libs/corecompile.py @@ -19,7 +19,7 @@ import os import shutil from pathlib import Path -from ete3 import NCBITaxa +from ete4 import NCBITaxa from Bio import SeqIO import time diff --git a/fdog/libs/preparation.py b/fdog/libs/preparation.py index 2d79f82..f18e141 100644 --- a/fdog/libs/preparation.py +++ b/fdog/libs/preparation.py @@ -17,10 +17,10 @@ import sys import os +import subprocess from pathlib import Path from Bio import SeqIO -from Bio.Blast.Applications import NcbiblastpCommandline -from ete3 import NCBITaxa +from ete4 import NCBITaxa import fdog.libs.zzz as general_fn import fdog.libs.fasta as fasta_fn @@ -107,17 +107,15 @@ def check_input(args): def check_blast_version(corepath, refspec): """ Check if blast DBs in corepath is compatible with blastp version """ - fdog_path = os.path.realpath(__file__).replace('/libs/preparation.py','') - query = fdog_path + '/data/infile.fa' - blast_db = '%s/%s/%s' % (corepath, refspec, refspec) + fdog_path = os.path.realpath(__file__).replace('/libs/preparation.py', '') + query = os.path.join(fdog_path, 'data', 'infile.fa') + blast_db = os.path.join(corepath, refspec, refspec) try: - blastp_cline = NcbiblastpCommandline( - query = query, db = blast_db) - stdout, stderr = blastp_cline() - except: - sys.exit( - 'ERROR: Error running blast (probably conflict with BLAST DBs versions)\n%s' - % (NcbiblastpCommandline(query = query, db = blast_db))) + cmd = ["blastp", "-query", query, "-db", blast_db] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + except subprocess.CalledProcessError as e: + sys.exit(f"ERROR: Error running BLAST (probably conflict with BLAST DB versions)\n{e.stderr}") + def check_ranks_core_taxa(corepath, refspec, minDist, maxDist): """ Check if refspec (or all core taxa) have a valid minDist and maxDist tax ID diff --git a/fdog/libs/tree.py b/fdog/libs/tree.py index b2de19f..57efcaf 100644 --- a/fdog/libs/tree.py +++ b/fdog/libs/tree.py @@ -16,7 +16,7 @@ ####################################################################### import re -from ete3 import NCBITaxa +from ete4 import NCBITaxa import fdog.libs.zzz as general_fn @@ -57,8 +57,9 @@ def get_ancestor(id1, id2, ncbi): Return dictionary {ancestor_id: ancestor_rank} """ tree = ncbi.get_topology([id1, id2], intermediate_nodes = False) - ancestor = tree.get_common_ancestor(id1, id2).name - return(ncbi.get_rank([ancestor])) + ancestor_name = tree.common_ancestor(id1, id2) + ancestor_id = int(ancestor_name.name) + return(ncbi.get_rank([ancestor_id])) def check_common_ancestor(ref_id, ancestor, minDist, maxDist, ncbi): @@ -68,6 +69,7 @@ def check_common_ancestor(ref_id, ancestor, minDist, maxDist, ncbi): """ ref_lineage = ncbi.get_lineage(ref_id) (min_ref, max_ref) = get_rank_range(ref_lineage, minDist, maxDist, ncbi) + ancestor = int(ancestor) if not ancestor in ref_lineage: return(0) ancestor_index = len(ref_lineage) - ref_lineage.index(ancestor) - 1 @@ -78,7 +80,7 @@ def check_common_ancestor(ref_id, ancestor, minDist, maxDist, ncbi): def remove_clade(tree, node_id): """ Remove a clade from a tree """ - removed_clade = tree.search_nodes(name = str(node_id))[0] + removed_clade = list(tree.search_nodes(name = str(node_id)))[0] removed_node = removed_clade.detach() return(tree) @@ -96,12 +98,12 @@ def get_leaves_dict(spec_lineage, tree, min_index, max_index): for i in range(len(spec_lineage)): if i >= min_index and i <= max_index: curr_node = spec_lineage[i] - node = tree.search_nodes(name = str(curr_node)) + node = list(tree.search_nodes(name = str(curr_node))) if len(node) > 0: for leaf in node: node_dict[spec_lineage[i]] = [] for t in leaf.traverse(): - if t.is_leaf(): + if t.is_leaf: if not t.name in already_added: already_added.append(t.name) node_dict[spec_lineage[i]].append(t.name) diff --git a/fdog/mergeOutput.py b/fdog/mergeOutput.py index b5c5006..9e7d0c0 100644 --- a/fdog/mergeOutput.py +++ b/fdog/mergeOutput.py @@ -21,7 +21,7 @@ from os import listdir as ldir import argparse import yaml -from pkg_resources import get_distribution +from importlib.metadata import version, PackageNotFoundError from Bio import SeqIO def createConfigPP(phyloprofile, domains_0, ex_fasta, directory, out): @@ -37,8 +37,8 @@ def createConfigPP(phyloprofile, domains_0, ex_fasta, directory, out): def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.') + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.') parser.add_argument('-i', '--input', help='Input directory, where all single output (.extended.fa, .phyloprofile, _forward.domains, _reverse.domains) can be found', action='store', default='', required=True) diff --git a/fdog/removefDog.py b/fdog/removefDog.py index 9a28b41..bd34c90 100644 --- a/fdog/removefDog.py +++ b/fdog/removefDog.py @@ -20,7 +20,7 @@ import argparse import subprocess import shutil -from pkg_resources import get_distribution +from importlib.metadata import version, PackageNotFoundError import fdog.setupfDog as setupfDog_fn @@ -48,8 +48,8 @@ def query_yes_no(question, default='yes'): def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.') + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.') parser.add_argument('--all', help='Remove fdog together with all files/data within the installed fdog directory', action='store_true', default=False) args = parser.parse_args() data = args.all diff --git a/fdog/runMulti.py b/fdog/runMulti.py index 1bce014..b1b3587 100644 --- a/fdog/runMulti.py +++ b/fdog/runMulti.py @@ -25,8 +25,8 @@ import shutil import multiprocessing as mp from tqdm import tqdm -from ete3 import NCBITaxa -from pkg_resources import get_distribution +from ete4 import NCBITaxa +from importlib.metadata import version, PackageNotFoundError import time import fdog.libs.zzz as general_fn @@ -161,8 +161,8 @@ def join_outputs(outpath, jobName, seeds, keep, silentOff): def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.', + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.', epilog="For more information on certain options, please refer to the wiki pages " "on github: https://github.com/BIONF/fDOG/wiki") required = parser.add_argument_group('Required arguments') diff --git a/fdog/runSingle.py b/fdog/runSingle.py index a781a63..9520249 100644 --- a/fdog/runSingle.py +++ b/fdog/runSingle.py @@ -19,8 +19,8 @@ import os import argparse import subprocess -from ete3 import NCBITaxa -from pkg_resources import get_distribution +from ete4 import NCBITaxa +from importlib.metadata import version, PackageNotFoundError import time import fdog.libs.zzz as general_fn @@ -33,8 +33,8 @@ def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.', + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.', epilog="For more information on certain options, please refer to the wiki pages " "on github: https://github.com/BIONF/fDOG/wiki") required = parser.add_argument_group('Required arguments') diff --git a/fdog/setPaths.py b/fdog/setPaths.py index 3b6e501..7271427 100644 --- a/fdog/setPaths.py +++ b/fdog/setPaths.py @@ -19,7 +19,7 @@ import os import argparse -from pkg_resources import get_distribution +from importlib.metadata import version, PackageNotFoundError import fdog.libs.zzz as general_fn import fdog.checkData as check_data_fn @@ -65,8 +65,8 @@ def check_data(searchpath, corepath, annopath): def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.') + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.') required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') required.add_argument('--searchpath', help='Path to search taxa folder (e.g. fdog_data/searchTaxa_dir)', action='store', default='', required=True) diff --git a/fdog/setupfDog.py b/fdog/setupfDog.py index 2de9c06..184de6f 100644 --- a/fdog/setupfDog.py +++ b/fdog/setupfDog.py @@ -22,9 +22,9 @@ import argparse import subprocess import shutil -from ete3 import NCBITaxa +from ete4 import NCBITaxa from pathlib import Path -from pkg_resources import get_distribution +from importlib.metadata import version, PackageNotFoundError import fdog.libs.zzz as general_fn import fdog.libs.fas as fas_fn @@ -147,25 +147,24 @@ def check_dependencies(fdogPath): def download_data(dataPath, resetData): """ Downloade pre-calculated fDOG data """ - data_fdog_file = "data_HaMStR-2019c.tar.gz" + data_fdog_file = "data_fDOG_2024.tar.gz" checksum_data = "1748371655 621731824 $data_fdog_file" genome_path = '%s/searchTaxa_dir' % dataPath Path(genome_path).mkdir(parents = True, exist_ok = True) - if len(general_fn.read_dir(genome_path)) < 1 or resetData: data_url = 'https://applbio.biologie.uni-frankfurt.de/download/hamstr_qfo' if os.path.exists(data_fdog_file) and resetData: os.remove(data_fdog_file) - # general_fn.download_file(data_url, data_fdog_file) - ####### temporary solution while the uni network does not work ######### - wgetCmd = 'wget "https://www.dropbox.com/scl/fi/t2ln18k0jthc3y74s591q/data_HaMStR-2019c.tar.gz?rlkey=c66nc3eslqyn2a6k6ey4e678r&st=plzvbllv&dl=0"' - try: - subprocess.run([wgetCmd], shell=True, check=True) - shutil.move("data_HaMStR-2019c.tar.gz?rlkey=c66nc3eslqyn2a6k6ey4e678r&st=plzvbllv&dl=0", "data_HaMStR-2019c.tar.gz") - except: - print('Problem occurred while download demo data from dropbox') - ######################################################################## + general_fn.download_file(data_url, data_fdog_file) + # ####### temporary solution while the uni network does not work ######### + # wgetCmd = 'wget "https://www.dropbox.com/scl/fi/t2ln18k0jthc3y74s591q/data_HaMStR-2019c.tar.gz?rlkey=c66nc3eslqyn2a6k6ey4e678r&st=plzvbllv&dl=0"' + # try: + # subprocess.run([wgetCmd], shell=True, check=True) + # shutil.move("data_HaMStR-2019c.tar.gz?rlkey=c66nc3eslqyn2a6k6ey4e678r&st=plzvbllv&dl=0", "data_HaMStR-2019c.tar.gz") + # except: + # print('Problem occurred while download demo data from dropbox') + # ######################################################################## try: print('Extracting %s...' % data_fdog_file) shutil.unpack_archive(data_fdog_file, dataPath, 'gztar') @@ -175,7 +174,7 @@ def download_data(dataPath, resetData): os.rename('%s/genome_dir' % dataPath, '%s/searchTaxa_dir' % dataPath) os.rename('%s/blast_dir' % dataPath, '%s/coreTaxa_dir' % dataPath) os.rename('%s/weight_dir' % dataPath, '%s/annotation_dir' % dataPath) - check_cmd = 'fdog.checkData -s %s/searchTaxa_dir -c %s/coreTaxa_dir -a %s/annotation_dir --reblast' % (dataPath, dataPath, dataPath) + check_cmd = 'fdog.checkData -s %s/searchTaxa_dir -c %s/coreTaxa_dir -a %s/annotation_dir --reblast --ignoreAnno' % (dataPath, dataPath, dataPath) try: print('Checking downloaded data...') subprocess.run([check_cmd], stdout = subprocess.DEVNULL, check = True, shell = True) @@ -201,8 +200,8 @@ def write_pathconfig(fdogPath, dataPath): def main(): - version = get_distribution('fdog').version - parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(version) + '.') + fdog_version = version("fdog") + parser = argparse.ArgumentParser(description='You are running fDOG version ' + str(fdog_version) + '.') required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional arguments') required.add_argument('-d', '--dataPath', help='Output path for fDOG data', action='store', default='', required=True) @@ -234,7 +233,7 @@ def main(): ### check if pathconfig file exists pathconfigFile = '%s/bin/pathconfig.yml' % fdogPath - demo_cmd = 'fdog.run --seqFile infile.fa --jobName test --refspec HUMAN@9606@3' + demo_cmd = 'fdog.run --seqFile infile.fa --jobName test --refspec HUMAN@9606@qfo24_02' if os.path.exists(pathconfigFile) and not force: check_fas = 1 if not woFAS: @@ -247,7 +246,7 @@ def main(): print('You can test fDOG using the following command:\n%s --fasOff' % demo_cmd) sys.exit() - ### get ncbi taxonomy database for ete3 + ### get ncbi taxonomy database for ete4 print('*** Creating local NCBI taxonomy database...') ncbi = NCBITaxa() @@ -262,11 +261,18 @@ def main(): if check_conda_env() == True: req_file = '%s/data/conda_requirements.yml' % fdogPath print('=> Dependencies in %s' % req_file) - conda_install_cmd = 'conda install -c bioconda --file %s -y' % (req_file) + + install_cmd = f'install -c bioconda --file {req_file} -y' + if shutil.which("micromamba"): + install_cmd = f'micromamba {install_cmd}' + elif shutil.which("mamba"): + install_cmd = f'mamba {install_cmd}' + else: + install_cmd = f'conda {install_cmd}' try: - subprocess.call([conda_install_cmd], shell = True) + subprocess.call(install_cmd, shell=True) except: - sys.exit('\033[91mERROR: Cannot install conda packages in %s!\033[0m' % req_file) + sys.exit(f'\033[91mERROR: Cannot install conda packages in {req_file}!\033[0m') else: install_cmd = 'sudo apt-get install -y -qq ' sys.exit('\033[91mERROR: Please install these tools manually:\n%s\nusing the command: %s!\033[0m' % (', '.join(missing_tools), install_cmd)) diff --git a/fdog/showTaxa.py b/fdog/showTaxa.py index 7bb27c4..fd41ada 100644 --- a/fdog/showTaxa.py +++ b/fdog/showTaxa.py @@ -17,7 +17,7 @@ import sys import os -from ete3 import NCBITaxa +from ete4 import NCBITaxa import fdog.libs.zzz as general_fn diff --git a/setup.py b/setup.py index 717be14..a35b396 100644 --- a/setup.py +++ b/setup.py @@ -26,8 +26,8 @@ setup( name="fdog", - version="0.1.35", - python_requires='>=3.7.0', + version="1.0.0", + python_requires='>=3.12.0', description="Feature-aware Directed OrtholoG search tool", long_description=long_description, long_description_content_type="text/markdown", @@ -39,7 +39,7 @@ install_requires=[ 'biopython', 'tqdm', - 'ete3', + 'ete4', 'six', 'PyYAML', 'pyhmmer',