From 7f73cf6693395ed3d5b0a60f7450acb7f824c6c4 Mon Sep 17 00:00:00 2001 From: magicprotoss Date: Wed, 14 Aug 2024 22:36:55 +0800 Subject: [PATCH 1/6] =?UTF-8?q?purge=20=F0=9F=92=A9;=20add=20-id=20option?= =?UTF-8?q?=20when=20building=20zotu=20tab?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- q2_usearch/_illumina_pipeline.py | 176 ++++++++++--------------------- q2_usearch/plugin_setup.py | 7 ++ 2 files changed, 62 insertions(+), 121 deletions(-) diff --git a/q2_usearch/_illumina_pipeline.py b/q2_usearch/_illumina_pipeline.py index b8cb43a..0d263b2 100644 --- a/q2_usearch/_illumina_pipeline.py +++ b/q2_usearch/_illumina_pipeline.py @@ -52,6 +52,18 @@ def py_to_cli_interface(cmd, verbose=True): return log_lines_lst +def validate_threads_count(user_threads, verbose=True): + if user_threads == "auto": + return os.cpu_count() - 3 + threads = user_threads + node_thread_count = os.cpu_count() + if user_threads > node_thread_count: + if verbose: + print("Number of threads specified higher than max available on node...") + print("Setting threads to max available on node...") + return node_thread_count + return threads + # Pool All Samples into a single fastq @@ -112,57 +124,22 @@ def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations else: print("Now Working on sample: " + sample_id) - # Could not find -fastx_relabel equivalent in vsearch - if use_vsearch: - dna_seqs_gen = skbio.io.registry.read( - gzip_reader, format="fastq", verify=True, variant=variant) - # skbio's fastq writer is way too slow - i = 0 - for seq in dna_seqs_gen: - i = i + 1 - seq.metadata["id"] = sample_id + "." + str(i) - if not keep_annotations: - seq.metadata["description"] = "" - else: - seq.metadata["description"] = seq.metadata['description'].replace( - "\t", " ") - # pooled seqs use 1.8 encoding - seq.write(pooled_seqs_fh, format="fastq", variant="illumina1.8") - - # Usearch is much faster at relabeling seqs - else: - - uzipped_seq_fp = os.path.join( - unzipped_seqs_dirpath, sample_id + ".fastq") - relabed_seq_fp = os.path.join( - relabed_seqs_dirpath, sample_id + ".fastq") - - with open(uzipped_seq_fp, 'wt') as f: - f.write(gzip_reader.read()) - f.flush() - - # get input seqs count - dna_seqs_gen = skbio.io.registry.read( - uzipped_seq_fp, format="fastq", verify=True, variant=variant) - i = len([seq for seq in dna_seqs_gen]) - - # build relab command - cmd = ["usearch", - "-fastx_relabel", uzipped_seq_fp, - "-prefix", sample_id + ".", - "-fastqout", relabed_seq_fp - ] - - relab_log = py_to_cli_interface(cmd, False) - - os.remove(uzipped_seq_fp) - - # merge all relabed seqs into one file - - with open(relabed_seq_fp, 'rt') as seq: - shutil.copyfileobj(seq, pooled_seqs_fh) - # del relabed seq to save space - os.remove(relabed_seq_fp) + # -fastx_relabel was deprecated in v12 + + dna_seqs_gen = skbio.io.registry.read( + gzip_reader, format="fastq", verify=True, variant=variant) + # skbio's fastq writer is way too slow + i = 0 + for seq in dna_seqs_gen: + i = i + 1 + seq.metadata["id"] = sample_id + "." + str(i) + if not keep_annotations: + seq.metadata["description"] = "" + else: + seq.metadata["description"] = seq.metadata['description'].replace( + "\t", " ") + # pooled seqs use 1.8 encoding + seq.write(pooled_seqs_fh, format="fastq", variant="illumina1.8") # write input seqs count to stats_df pipeout_denoise_stats_df.loc[index, 'prior_to_maxee_filt'] = i @@ -262,6 +239,7 @@ def _quality_control_cli(working_dir, min_qscore=None, # Since Usearch Retains LQ reads during the final otutab stage, don't perform anthing other than maxee filtering max_ee=1.0, trim_left=0, + trim_right=0, # add this parameter to allow position-based trimming on merged reads trunc_right=0, min_len=50, # Length filter is nessesary when dealing with valid data max_ns=None, @@ -299,7 +277,7 @@ def _quality_control_cli(working_dir, cmd += ["-fastq_maxns", str(max_ns)] # add threads settings if threads != "auto": - cmd += ["-threads", str(threads)] + cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] # ring notification bell if verbose: @@ -325,6 +303,10 @@ def _quality_control_cli(working_dir, 0] stats_df = stats_df.groupby('sample-id').count() + + # conserve disk space + if use_vsearch: + os.remove(pooled_seqs_fp) return stats_df @@ -360,7 +342,7 @@ def _dereplicate_cli(working_dir, if strand == "both": cmd += ["-strand", "both"] if threads != "auto": - cmd += ["-threads", str(threads)] + cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] # run command and get stats derep_log = py_to_cli_interface(cmd, verbose) @@ -379,6 +361,10 @@ def _dereplicate_cli(working_dir, filtered_reads = int(derep_stats_lst[derep_stats_lst.index("seqs") - 1]) unique_reads = int(derep_stats_lst[derep_stats_lst.index("uniques") - 1]) singletons = int(derep_stats_lst[derep_stats_lst.index("singletons") - 1]) + + # conserve disk space + if not use_vsearch: + os.remove(filtered_reads_fp) return filtered_reads, unique_reads, singletons @@ -419,10 +405,10 @@ def _unoise_cli(working_dir, ] if min_size != 8: - unoise_cmd += ["-minsize", str(min_size)] + unoise_cmd += ["--minsize", str(min_size)] if unoise_alpha != 2.0: - unoise_cmd += ["-alpha", str(unoise_alpha)] + unoise_cmd += ["--alpha", str(unoise_alpha)] py_to_cli_interface(unoise_cmd, verbose) @@ -433,6 +419,9 @@ def _unoise_cli(working_dir, ] silence = py_to_cli_interface(uchime_cmd, verbose) + + # conserve disk space + os.remove(unique_reads_fp) def _split_zotu_chimera(working_dir, use_vsearch: bool = False, @@ -525,6 +514,7 @@ def _cluster_zotus_cli(working_dir, return otus def _build_zotu_tab_cli(working_dir, + identity=0.97, threads="auto", chimera_map="vsearch", use_vsearch: bool = False, @@ -538,7 +528,6 @@ def _build_zotu_tab_cli(working_dir, tsv_chimeratab_fp = os.path.join(working_dir, "chimera_tab.tsv") unmapped_reads_fp = os.path.join(working_dir, "unmapped.fasta") log_fp = os.path.join(working_dir, "otutab.log") - node_thread_count = os.cpu_count() # Building otu table command if use_vsearch: @@ -558,20 +547,11 @@ def _build_zotu_tab_cli(working_dir, cmd += [ "-otutabout", tsv_otutab_fp, "-notmatched", unmapped_reads_fp, - "-id", "1.0", + "-id", str(identity), "-log", log_fp ] - if threads != "auto": - if threads > node_thread_count: - if verbose: - print("Number of threads specified higher than max available on node...") - print("Setting threads to max available on node...") - cmd += ["-threads", str(node_thread_count)] - else: - cmd += ["-threads", str(threads)] - else: - cmd += ["-threads", str(node_thread_count - 3)] + cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] # run command # we can do stats in another function @@ -591,16 +571,7 @@ def _build_zotu_tab_cli(working_dir, "-otutabout", tsv_chimeratab_fp ] - if threads != "auto": - if threads > node_thread_count: - if verbose: - print("Number of threads specified higher than max available on node...") - print("Setting threads to max available on node...") - chimera_cmd += ["-threads", str(node_thread_count)] - else: - chimera_cmd += ["-threads", str(threads)] - else: - chimera_cmd += ["-threads", str(node_thread_count - 3)] + chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] else: chimera_cmd = ["vsearch", "--usearch_global", unmapped_reads_fp, @@ -609,16 +580,7 @@ def _build_zotu_tab_cli(working_dir, "--strand", "both", "--otutabout", tsv_chimeratab_fp, ] - if threads != "auto": - if threads > node_thread_count: - if verbose: - print("Number of threads specified higher than max available on node...") - print("Setting threads to max available on node...") - chimera_cmd += ["--threads", str(node_thread_count)] - else: - chimera_cmd += ["--threads", str(threads)] - else: - chimera_cmd += ["--threads", str(node_thread_count - 3)] + chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] # run command # we can do stats in another function @@ -701,7 +663,6 @@ def _build_otu_tab_cli(working_dir, tsv_chimeratab_fp = os.path.join(working_dir, "chimera_tab.tsv") unmapped_reads_fp = os.path.join(working_dir, "unmapped.fasta") log_fp = os.path.join(working_dir, "otutab.log") - node_thread_count = os.cpu_count() # Building otu table command if use_vsearch: @@ -725,16 +686,7 @@ def _build_otu_tab_cli(working_dir, "-log", log_fp ] - if threads != "auto": - if threads > node_thread_count: - if verbose: - print("Number of threads specified higher than max available on node...") - print("Setting threads to max available on node...") - cmd += ["-threads", str(node_thread_count)] - else: - cmd += ["-threads", str(threads)] - else: - cmd += ["-threads", str(node_thread_count - 3)] + cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] # run command # we can do stats in another function @@ -754,16 +706,7 @@ def _build_otu_tab_cli(working_dir, "-otutabout", tsv_chimeratab_fp ] - if threads != "auto": - if threads > node_thread_count: - if verbose: - print("Number of threads specified higher than max available on node...") - print("Setting threads to max available on node...") - chimera_cmd += ["-threads", str(node_thread_count)] - else: - chimera_cmd += ["-threads", str(threads)] - else: - chimera_cmd += ["-threads", str(node_thread_count - 3)] + chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] else: chimera_cmd = ["vsearch", "--usearch_global", unmapped_reads_fp, @@ -772,17 +715,7 @@ def _build_otu_tab_cli(working_dir, "--strand", "both", "--otutabout", tsv_chimeratab_fp, ] - if threads != "auto": - if threads > node_thread_count: - if verbose: - print("Number of threads specified higher than max available on node...") - print("Setting threads to max available on node...") - chimera_cmd += ["--threads", str(node_thread_count)] - else: - chimera_cmd += ["--threads", str(threads)] - else: - chimera_cmd += ["--threads", str(node_thread_count - 3)] - + chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] # run command # we can do stats in another function if os.path.exists(chimeras_fp): @@ -880,6 +813,7 @@ def denoise_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleE max_ee: float = 1.0, n_threads: str = "auto", min_size: int = 8, + min_zotu_mapping_identity: float = 1.00, # change to 0.97 in later release unoise_alpha: float = 2.0, use_vsearch: bool = False, ) -> (biom.Table, pd.Series, qiime2.Metadata): @@ -904,7 +838,7 @@ def denoise_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleE " ;Singletons: " + str(singletons_count) + " ;Amplicons: " + \ str(amplicons_count) + " ;zOTUs: " + str(zotus_count) - _build_zotu_tab_cli(usearch_wd, use_vsearch=use_vsearch, + _build_zotu_tab_cli(usearch_wd, use_vsearch=use_vsearch, min_zotu_mapping_identity = min_zotu_mapping_identity, threads=n_threads, verbose=verbose) table, representative_sequences, reads_mapped_to_zotus_df, reads_mapped_to_chimeras_df = _prep_results_for_artifact_api( usearch_wd, verbose=verbose) @@ -1043,7 +977,7 @@ def denoise_then_cluster_no_primer_pooled(demultiplexed_sequences: SingleLanePer denoise_stats_str = "Total Reads: " + str(filtered_reads_count) + " ;Unique Reads :" + str(unique_reads_count) + \ " ;Singletons: " + str(singletons_count) + " ;Amplicons: " + \ - str(amplicons_count) + " ;ZOTUs: " + str(zotus_count) + " ;OTUs: " + str(otus_count) + str(amplicons_count) + " ;zOTUs: " + str(zotus_count) + " ;OTUs: " + str(otus_count) _build_otu_tab_cli(usearch_wd, identity=perc_identity, use_vsearch=use_vsearch, threads=n_threads, verbose=verbose) diff --git a/q2_usearch/plugin_setup.py b/q2_usearch/plugin_setup.py index f7d2cb6..f4f5fa7 100644 --- a/q2_usearch/plugin_setup.py +++ b/q2_usearch/plugin_setup.py @@ -49,6 +49,7 @@ 'min_len': Int % Range(0, None), 'max_ee': Float % Range(0.0, None), 'min_size': Int % Range(1, None), + 'min_zotu_mapping_identity': Float % Range(0.97, 1.00), 'unoise_alpha': Float % Range(0.0, None), 'n_threads': Int % Range(1, None) | Str % Choices(['auto']), 'use_vsearch': Bool, @@ -74,6 +75,12 @@ 'min_size': ('The minimum abundance of input reads to be retained. ' 'For higher sensivity, reducing minsize to 4 is reasonable. ' 'Note: with smaller minsize, there tends to be more errors in low-abundance zotus. '), + 'min_zotu_mapping_identity': ('When building zotu tab, the minimum identity to map a given read to a zotu. ' + 'This parameter is set to filter out all reads that contains too much errors, ' + 'Which supposedly, would effect the their pair-wise alignment against zOTUs when building zOTU table. ' + 'In dada2, this is avoided by performing aggressive max_ee filtering based on q-scores to remove those reads.' + "Then the 'KDIST_CUTOFF' and 'BAND_SIZE' parameter further restricts weak alignment. " + 'The default value is 0.97, which is the canonical radius of OTU clustering in the past. '), 'unoise_alpha': 'See UNOISE2 paper for definition', 'n_threads': ('The number of threads to use for computation. ' 'If set to auto, the plug-in will use (all vcores - 3) present on the node.'), From d8c655a8a3c4f461b851b7633fda63be2e1a3fff Mon Sep 17 00:00:00 2001 From: magicprotoss Date: Thu, 15 Aug 2024 22:21:30 +0800 Subject: [PATCH 2/6] =?UTF-8?q?fix=20=F0=9F=90=9B;=20change=20api=20to=20m?= =?UTF-8?q?atch=20q2-dada2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- q2_usearch/_illumina_pipeline.py | 6 +++--- q2_usearch/plugin_setup.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/q2_usearch/_illumina_pipeline.py b/q2_usearch/_illumina_pipeline.py index 0d263b2..2808f41 100644 --- a/q2_usearch/_illumina_pipeline.py +++ b/q2_usearch/_illumina_pipeline.py @@ -806,7 +806,7 @@ def _prep_results_for_artifact_api(working_dir, return table, rep_sequences, reads_mapped_to_features_df, reads_mapped_to_chimeras_df -def denoise_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleEndFastqDirFmt, +def denoise_no_primer_pooled(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt, trim_left: int = 0, trunc_len: int = 0, min_len: int = 50, @@ -820,7 +820,7 @@ def denoise_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleE verbose = True - demultiplexed_sequences_dirpath = str(demultiplexed_sequences) + demultiplexed_sequences_dirpath = str(demultiplexed_seqs) with tempfile.TemporaryDirectory() as usearch_wd: input_stats_df = _pool_samples( demultiplexed_sequences_dirpath, usearch_wd, use_vsearch=use_vsearch, verbose=verbose) @@ -838,7 +838,7 @@ def denoise_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleE " ;Singletons: " + str(singletons_count) + " ;Amplicons: " + \ str(amplicons_count) + " ;zOTUs: " + str(zotus_count) - _build_zotu_tab_cli(usearch_wd, use_vsearch=use_vsearch, min_zotu_mapping_identity = min_zotu_mapping_identity, + _build_zotu_tab_cli(usearch_wd, use_vsearch=use_vsearch, identity = min_zotu_mapping_identity, threads=n_threads, verbose=verbose) table, representative_sequences, reads_mapped_to_zotus_df, reads_mapped_to_chimeras_df = _prep_results_for_artifact_api( usearch_wd, verbose=verbose) diff --git a/q2_usearch/plugin_setup.py b/q2_usearch/plugin_setup.py index f4f5fa7..e3e89e1 100644 --- a/q2_usearch/plugin_setup.py +++ b/q2_usearch/plugin_setup.py @@ -49,7 +49,7 @@ 'min_len': Int % Range(0, None), 'max_ee': Float % Range(0.0, None), 'min_size': Int % Range(1, None), - 'min_zotu_mapping_identity': Float % Range(0.97, 1.00), + 'min_zotu_mapping_identity': Float % Range(0.97, 1, inclusive_start=True, inclusive_end=True), 'unoise_alpha': Float % Range(0.0, None), 'n_threads': Int % Range(1, None) | Str % Choices(['auto']), 'use_vsearch': Bool, @@ -87,9 +87,9 @@ 'use_vsearch': 'Use vsearch instead of usearch for computation . ' }, inputs={ - 'demultiplexed_sequences': SampleData[SequencesWithQuality] | SampleData[JoinedSequencesWithQuality]}, + 'demultiplexed_seqs': SampleData[SequencesWithQuality] | SampleData[JoinedSequencesWithQuality]}, input_descriptions={ - 'demultiplexed_sequences': 'Quality screened, Adapter stripped, Joined(paired-end) sequences.'}, + 'demultiplexed_seqs': 'Quality screened, Adapter stripped, Joined(paired-end) sequences.'}, outputs=[('table', FeatureTable[Frequency]), ('representative_sequences', FeatureData[Sequence]), ('denoising_stats', SampleData[USEARCHStats])], From c0fcc35e4b29ca57bcefbd5955176abd50ee6460 Mon Sep 17 00:00:00 2001 From: magicprotoss Date: Fri, 16 Aug 2024 17:44:54 +0800 Subject: [PATCH 3/6] =?UTF-8?q?purge=20=F0=9F=92=A9;=20update=20api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 +- README_ZH_CN.md | 8 +- q2_usearch/_illumina_pipeline.py | 487 +++++++++++++++---------------- q2_usearch/plugin_setup.py | 8 +- 4 files changed, 240 insertions(+), 271 deletions(-) diff --git a/README.md b/README.md index b18a8a8..b102b87 100644 --- a/README.md +++ b/README.md @@ -216,7 +216,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences fastq-seqs.qza \ + --i-demultiplexed-seqs fastq-seqs.qza \ --p-min-size 4 \ --o-representative-sequences rep-seqs-unoise3.qza \ --o-table table-unoise3.qza \ @@ -231,7 +231,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences ddbj_dl.qza \ + --i-demultiplexed-seqs ddbj_dl.qza \ --p-min-size 4 \ --o-representative-sequences rep-seqs-unoise3.qza \ --o-table table-unoise3.qza \ @@ -275,7 +275,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences demux.qza \ + --i-demultiplexed-seqs demux.qza \ --p-min-size 4 \ --p-trunc-len 120 \ --o-representative-sequences rep-seqs-unoise3.qza \ @@ -325,7 +325,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences merged.qza \ + --i-demultiplexed-seqs merged.qza \ --p-min-size 4 \ --o-representative-sequences rep-seqs-unoise3.qza \ --o-table table-unoise3.qza \ diff --git a/README_ZH_CN.md b/README_ZH_CN.md index 83fe495..6d50131 100644 --- a/README_ZH_CN.md +++ b/README_ZH_CN.md @@ -199,7 +199,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences fastq-seqs.qza \ + --i-demultiplexed-seqs fastq-seqs.qza \ --p-min-size 4 \ --o-representative-sequences rep-seqs-unoise3.qza \ --o-table table-unoise3.qza \ @@ -213,7 +213,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences ddbj_dl.qza \ + --i-demultiplexed-seqs ddbj_dl.qza \ --p-min-size 4 \ --o-representative-sequences rep-seqs-unoise3.qza \ --o-table table-unoise3.qza \ @@ -253,7 +253,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences demux.qza \ + --i-demultiplexed-seqs demux.qza \ --p-min-size 4 \ --p-trunc-len 120 \ --o-representative-sequences rep-seqs-unoise3.qza \ @@ -298,7 +298,7 @@ cd .. && rm -rf q2-usearch ``` bash qiime usearch denoise-no-primer-pooled \ - --i-demultiplexed-sequences merged.qza \ + --i-demultiplexed-seqs merged.qza \ --p-min-size 4 \ --o-representative-sequences rep-seqs-unoise3.qza \ --o-table table-unoise3.qza \ diff --git a/q2_usearch/_illumina_pipeline.py b/q2_usearch/_illumina_pipeline.py index 2808f41..bf73f08 100644 --- a/q2_usearch/_illumina_pipeline.py +++ b/q2_usearch/_illumina_pipeline.py @@ -52,6 +52,7 @@ def py_to_cli_interface(cmd, verbose=True): return log_lines_lst + def validate_threads_count(user_threads, verbose=True): if user_threads == "auto": return os.cpu_count() - 3 @@ -67,14 +68,14 @@ def validate_threads_count(user_threads, verbose=True): # Pool All Samples into a single fastq -def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations: bool = False, use_vsearch: bool = False, debug = False, verbose: bool = True): +def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations: bool = False, use_vsearch: bool = False, debug=False, verbose: bool = True): input_manifest_df = pd.read_csv(os.path.join( demultiplexed_sequences_dirpath, 'MANIFEST'), index_col=0, comment='#') - + use_temp_sample_ids = False - + # check if all input sample_ids meet usearch sample identifier requirements - if len(input_manifest_df) != len([ sample_id for sample_id in input_manifest_df.index.to_list() if re.match(r'^[a-zA-Z0-9_]+$', sample_id) ]): + if len(input_manifest_df) != len([sample_id for sample_id in input_manifest_df.index.to_list() if re.match(r'^[a-zA-Z0-9_]+$', sample_id)]): use_temp_sample_ids = True pipeout_denoise_stats_df = pd.DataFrame( index=input_manifest_df.index, columns=['prior_to_maxee_filt', 'fixed_sample_id']) @@ -88,7 +89,7 @@ def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations seqs_stats_dfs_dirpath = os.path.join(working_dir, "seqs_stats_dfs") os.mkdir(seqs_stats_dfs_dirpath) pooled_seqs_fp = os.path.join(working_dir, "merged.fastq") - + if debug: # just keep this part for debugging purpose # not sure if it's necessary @@ -99,13 +100,14 @@ def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations variant = "illumina1.8" elif phred_offset == 64: variant = "illumina1.3" - + if verbose: if use_temp_sample_ids: - print("Relabeling input seqs with unique sample identifiers, this will take a while...\n") + print( + "Relabeling input seqs with unique sample identifiers, this will take a while...\n") else: print("Adding sample-id to input seqs identifiers, this will take a while...\n") - + with open(pooled_seqs_fp, 'wt') as pooled_seqs_fh: sample_num = 1 for index, row in input_manifest_df.iterrows(): @@ -116,14 +118,14 @@ def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations fn = str(row['filename']) gzip_reader = gzip.open( os.path.join(demultiplexed_sequences_dirpath, fn), 'rt') - + if verbose: if use_temp_sample_ids: print("Now Working on sample: " + str(index)) print("Temporarily Relabeling this sample to: " + sample_id) else: print("Now Working on sample: " + sample_id) - + # -fastx_relabel was deprecated in v12 dna_seqs_gen = skbio.io.registry.read( @@ -140,15 +142,15 @@ def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations "\t", " ") # pooled seqs use 1.8 encoding seq.write(pooled_seqs_fh, format="fastq", variant="illumina1.8") - + # write input seqs count to stats_df pipeout_denoise_stats_df.loc[index, 'prior_to_maxee_filt'] = i - + if use_temp_sample_ids: # keep track of samples pipeout_denoise_stats_df.loc[index, 'fixed_sample_id'] = sample_id sample_num = sample_num + 1 - + # finally swap the index to fixed ids pipeout_denoise_stats_df.reset_index(inplace=True) @@ -156,74 +158,75 @@ def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations columns={'sample-id': 'original_sample_id'}, inplace=True) pipeout_denoise_stats_df.set_index('fixed_sample_id', inplace=True) pipeout_denoise_stats_df.index.name = "sample-id" - - + # seqkit makes this go burrrrr...... else: - + if verbose: - print("Adding sample-id to input seqs identifiers, seqkit makes things go burrrrrrrrrr...\n") - + print( + "Adding sample-id to input seqs identifiers, seqkit makes things go burrrrrrrrrr...\n") + with open(pooled_seqs_fp, 'wb') as pooled_seqs_fh: - + sample_num = 1 - + for index, row in input_manifest_df.iterrows(): - + if use_temp_sample_ids: sample_id = "S" + str(sample_num) else: sample_id = str(index) fn = str(row['filename']) input_fp = os.path.join(demultiplexed_sequences_dirpath, fn) - relabed_seq_fp = os.path.join(relabed_seqs_dirpath, sample_id + ".fastq") - seqs_stats_df_fp = os.path.join(seqs_stats_dfs_dirpath, sample_id + "_stats.tsv") - - + relabed_seq_fp = os.path.join( + relabed_seqs_dirpath, sample_id + ".fastq") + seqs_stats_df_fp = os.path.join( + seqs_stats_dfs_dirpath, sample_id + "_stats.tsv") + if verbose: if use_temp_sample_ids: print("Now Working on sample: " + str(index)) print("Temporarily Relabeling this sample to: " + sample_id) else: print("Now Working on sample: " + sample_id) - + # build relabel command relab_cmd = ["seqkit", - "replace", - "-i", input_fp, - "-o", relabed_seq_fp, - "-p", ".+", - "-r", sample_id + ".{nr}"] - + "replace", + "-i", input_fp, + "-o", relabed_seq_fp, + "-p", ".+", + "-r", sample_id + ".{nr}"] + silence = py_to_cli_interface(relab_cmd, verbose=False) - + # append to pooled_seqs and then del with open(relabed_seq_fp, 'rb') as seq: shutil.copyfileobj(seq, pooled_seqs_fh) - + os.remove(relabed_seq_fp) - + # build count seq num command stats_cmd = ["seqkit", - "stats", - "-T", - input_fp, - "-o", seqs_stats_df_fp] - + "stats", + "-T", + input_fp, + "-o", seqs_stats_df_fp] + silence = py_to_cli_interface(stats_cmd, verbose=False) - + # read stats df and append to pipeout_denoise_stats_df - stats_df = pd.read_csv(seqs_stats_df_fp, sep="\t", index_col=None, header=0) - pipeout_denoise_stats_df.loc[index, 'prior_to_maxee_filt'] = int(stats_df.at[0, 'num_seqs']) - + stats_df = pd.read_csv(seqs_stats_df_fp, sep="\t", + index_col=None, header=0) + pipeout_denoise_stats_df.loc[index, 'prior_to_maxee_filt'] = int( + stats_df.at[0, 'num_seqs']) - if use_temp_sample_ids: # keep track of samples pipeout_denoise_stats_df.loc[index, 'fixed_sample_id'] = sample_id sample_num = sample_num + 1 - - # finally swap the index to fixed ids + + # finally swap the index to fixed ids if use_temp_sample_ids: pipeout_denoise_stats_df.reset_index(inplace=True) @@ -236,10 +239,10 @@ def _pool_samples(demultiplexed_sequences_dirpath, working_dir, keep_annotations def _quality_control_cli(working_dir, - min_qscore=None, # Since Usearch Retains LQ reads during the final otutab stage, don't perform anthing other than maxee filtering + min_qscore=None, # Since Usearch Retains LQ reads during the final otutab stage, don't perform anthing other than maxee filtering max_ee=1.0, trim_left=0, - trim_right=0, # add this parameter to allow position-based trimming on merged reads + trim_right=0, # add this parameter to allow position-based trimming on merged reads trunc_right=0, min_len=50, # Length filter is nessesary when dealing with valid data max_ns=None, @@ -303,7 +306,7 @@ def _quality_control_cli(working_dir, 0] stats_df = stats_df.groupby('sample-id').count() - + # conserve disk space if use_vsearch: os.remove(pooled_seqs_fp) @@ -361,7 +364,7 @@ def _dereplicate_cli(working_dir, filtered_reads = int(derep_stats_lst[derep_stats_lst.index("seqs") - 1]) unique_reads = int(derep_stats_lst[derep_stats_lst.index("uniques") - 1]) singletons = int(derep_stats_lst[derep_stats_lst.index("singletons") - 1]) - + # conserve disk space if not use_vsearch: os.remove(filtered_reads_fp) @@ -419,10 +422,11 @@ def _unoise_cli(working_dir, ] silence = py_to_cli_interface(uchime_cmd, verbose) - + # conserve disk space os.remove(unique_reads_fp) + def _split_zotu_chimera(working_dir, use_vsearch: bool = False, verbose=True): @@ -430,7 +434,7 @@ def _split_zotu_chimera(working_dir, zotus_fp = os.path.join(working_dir, "zotus.fasta") chimeras_fp = os.path.join(working_dir, "chimeras.fasta") vsearch_amplicon_fp = os.path.join(working_dir, "vsearch_amps.fasta") - + # init counters amplicons = 0 zotus = 0 @@ -438,16 +442,19 @@ def _split_zotu_chimera(working_dir, if not use_vsearch: # used skbio and hashlib to hash the zotu ids # convert lower case to upper here - dna_seqs_gen = skbio.io.registry.read(amplicons_fp, format="fasta", verify=True, lowercase=True) + dna_seqs_gen = skbio.io.registry.read( + amplicons_fp, format="fasta", verify=True, lowercase=True) with open(chimeras_fp, "wt") as chimeras_fh: with open(zotus_fp, "wt") as zotus_fh: # input seqs already sorted by decreasing abundance by usearch for seq in dna_seqs_gen: if "amptype=chimera" in seq.metadata["id"]: - seq.metadata["id"] = hashlib.md5(str(seq).upper().encode('utf-8')).hexdigest() + seq.metadata["id"] = hashlib.md5( + str(seq).upper().encode('utf-8')).hexdigest() seq.write(chimeras_fh, format="fasta", max_width=80) else: - seq.metadata["id"] = hashlib.md5(str(seq).upper().encode('utf-8')).hexdigest() + seq.metadata["id"] = hashlib.md5( + str(seq).upper().encode('utf-8')).hexdigest() seq.write(zotus_fh, format="fasta", max_width=80) zotus += 1 amplicons += 1 @@ -474,19 +481,20 @@ def _split_zotu_chimera(working_dir, with open(chimeras_fp, "wt") as f: for seq in chimera_seqs_lst: seq.write(f, format="fasta", max_width=80) - + if verbose: print("Successfully split zotus and chimeras and converted to hashed ids") - + return amplicons, zotus + def _cluster_zotus_cli(working_dir, identity=0.99, use_vsearch: bool = False, verbose=True): zotus_fp = os.path.join(working_dir, "zotus.fasta") otus_fp = os.path.join(working_dir, "otus.fasta") - + # building uclust cmd # during the denoise step the output is sorted by size, seqs with higer abundance tend to be with lower noise if not use_vsearch: @@ -503,49 +511,71 @@ def _cluster_zotus_cli(working_dir, "--centroids", otus_fp, "--usersort" ] - + silence = py_to_cli_interface(cmd, verbose) - - otus = len([seq for seq in skbio.io.registry.read(otus_fp, format="fasta", verify=True)]) - + + otus = len([seq for seq in skbio.io.registry.read( + otus_fp, format="fasta", verify=True)]) + if verbose: print("Successfully clustered zotus into otus") - + return otus -def _build_zotu_tab_cli(working_dir, - identity=0.97, - threads="auto", - chimera_map="vsearch", - use_vsearch: bool = False, - verbose=True): - raw_reads_fp = os.path.join(working_dir, "merged.fastq") - filtered_reads_fp = os.path.join(working_dir, "filtered.fasta") - zotus_fp = os.path.join(working_dir, "zotus.fasta") +def _build_feature_tab_cli(working_dir, + feature_type, + identity=0.97, + threads="auto", + chimera_map="vsearch", + use_vsearch: bool = False, + verbose=True): + + if feature_type == "zOTU": + features_fp = os.path.join(working_dir, "zotus.fasta") + tsv_feature_tab_fp = os.path.join(working_dir, "zotu_tab.tsv") + elif feature_type == "OTU": + features_fp = os.path.join(working_dir, "otus.fasta") + tsv_feature_tab_fp = os.path.join(working_dir, "otu_tab.tsv") + else: + raise ValueError("Invalid feature type. Must be either 'zOTU' or 'OTU'.") + + if use_vsearch: + reads_in_fp = os.path.join(working_dir, "filtered.fasta") + else: + reads_in_fp = os.path.join(working_dir, "merged.fastq") + chimeras_fp = os.path.join(working_dir, "chimeras.fasta") - tsv_otutab_fp = os.path.join(working_dir, "zotu_tab.tsv") - tsv_chimeratab_fp = os.path.join(working_dir, "chimera_tab.tsv") + tsv_chimera_tab_fp = os.path.join(working_dir, "chimera_tab.tsv") unmapped_reads_fp = os.path.join(working_dir, "unmapped.fasta") log_fp = os.path.join(working_dir, "otutab.log") # Building otu table command if use_vsearch: + # https://github.com/torognes/vsearch/issues/552 + # vsearch now supports fastq inputs + # still need to validate clustering strategy (id or size) when id != 1 + # once clarified, we can update the pipeline to provide length trimmed reads as input + # for now, stick to the old pipeline cmd = ["vsearch", - "--usearch_global", filtered_reads_fp, - "--db", zotus_fp, - # https://github.com/torognes/vsearch/issues/552 + "--usearch_global", reads_in_fp, + "--db", features_fp, "--strand", "plus" ] else: cmd = ["usearch", - "-otutab", raw_reads_fp, - "-zotus", zotus_fp + "-otutab", reads_in_fp, ] + # identity first + if feature_type == "zOTU": + cmd += ["-zotus", features_fp] + # size first + if feature_type == "OTU": + cmd += ["-otus", features_fp] cmd += [ - "-otutabout", tsv_otutab_fp, + "-otutabout", tsv_feature_tab_fp, "-notmatched", unmapped_reads_fp, "-id", str(identity), "-log", log_fp @@ -560,36 +590,33 @@ def _build_zotu_tab_cli(working_dir, # step2 search chimeras against unmatched fasta # build cmd if chimera_map == "usearch": - if verbose: - print("This step is performed just to keep a mental check...") - print("Consider using vsearch to speed up this step...") + # Consider using vsearch to speed up this step... chimera_cmd = ["usearch", "-search_global", unmapped_reads_fp, "-db", chimeras_fp, "-id", "1.0", "-strand", "both", - "-otutabout", tsv_chimeratab_fp + "-otutabout", tsv_chimera_tab_fp ] - - chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] else: chimera_cmd = ["vsearch", "--usearch_global", unmapped_reads_fp, "--db", chimeras_fp, "--id", "1.0", "--strand", "both", - "--otutabout", tsv_chimeratab_fp, + "--otutabout", tsv_chimera_tab_fp, ] - chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] + chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] # run command # we can do stats in another function if os.path.exists(chimeras_fp): # 2nd layer of insurance - if len([ chimera for chimera in skbio.io.registry.read(chimeras_fp, format="fasta", verify=True)]) != 0: + if len([chimera for chimera in skbio.io.registry.read(chimeras_fp, format="fasta", verify=True)]) != 0: + print("This step is performed just to keep a mental check...") chimera_log = py_to_cli_interface(chimera_cmd, verbose) - - + + def _uparse_cli(working_dir, min_size=2, verbose=True): @@ -598,7 +625,7 @@ def _uparse_cli(working_dir, chimeras_fp = os.path.join(working_dir, "chimeras.fasta") uparse_tab_fp = os.path.join(working_dir, "uparse_out.tsv") log_fp = os.path.join(working_dir, "uparse.log") - + # building uparse command cmd = ["usearch", "-cluster_otus", unique_reads_fp, @@ -606,38 +633,43 @@ def _uparse_cli(working_dir, "-uparseout", uparse_tab_fp, "-log", log_fp ] - + if min_size != 2: cmd += ["-minsize", str(min_size)] - + silence = py_to_cli_interface(cmd, verbose) - + # get otu count and reformat otu_ids to qiime2 format # convert to uppercase just in case - dna_seqs_gen = skbio.io.registry.read(otus_fp, format="fasta", verify=True, lowercase=True) - otu_seqs = [ seq for seq in dna_seqs_gen ] + dna_seqs_gen = skbio.io.registry.read( + otus_fp, format="fasta", verify=True, lowercase=True) + otu_seqs = [seq for seq in dna_seqs_gen] otu_seq_count = len(otu_seqs) os.remove(otus_fp) with open(otus_fp, "wt") as f: for seq in otu_seqs: - skbio.DNA(str(seq), metadata = {'id': hashlib.md5(str(seq).upper().encode('utf-8')).hexdigest()}).write(f, format="fasta", max_width=80) + skbio.DNA(str(seq), metadata={'id': hashlib.md5(str(seq).upper().encode( + 'utf-8')).hexdigest()}).write(f, format="fasta", max_width=80) # indentify and retrive chimeras seqs # get chimera reads ids - + uparse_tab = pd.read_csv(uparse_tab_fp, sep="\t", header=None) - + try: - chimera_seqs_id_list = uparse_tab.loc[uparse_tab[1] == "noisy_chimera", 0].str.split(";", expand = True)[0].to_list() + chimera_seqs_id_list = uparse_tab.loc[uparse_tab[1] == "noisy_chimera", 0].str.split( + ";", expand=True)[0].to_list() chimera_seq_count = len(chimera_seqs_id_list) - dna_seqs_gen = skbio.io.registry.read(unique_reads_fp, format="fasta", verify=True) - chimera_seqs_lst = [ seq for seq in dna_seqs_gen if seq.metadata["id"].split(";")[0] in chimera_seqs_id_list ] - + dna_seqs_gen = skbio.io.registry.read( + unique_reads_fp, format="fasta", verify=True) + chimera_seqs_lst = [seq for seq in dna_seqs_gen if seq.metadata["id"].split(";")[ + 0] in chimera_seqs_id_list] + # Write chimeras with open(chimeras_fp, "wt") as f: for seq in chimera_seqs_lst: seq.write(f, format="fasta", max_width=80) - + except KeyError: chimera_seq_count = 0 if verbose: @@ -645,83 +677,10 @@ def _uparse_cli(working_dir, if verbose: print("Denovo OTU clustering completed, chimeras splitted") - - return otu_seq_count , chimera_seq_count -def _build_otu_tab_cli(working_dir, - identity = 0.97, - threads="auto", - chimera_map="vsearch", - use_vsearch: bool = False, - verbose=True): -################################################################################ - raw_reads_fp = os.path.join(working_dir, "merged.fastq") - filtered_reads_fp = os.path.join(working_dir, "filtered.fasta") - otus_fp = os.path.join(working_dir, "otus.fasta") - chimeras_fp = os.path.join(working_dir, "chimeras.fasta") - tsv_otutab_fp = os.path.join(working_dir, "otu_tab.tsv") - tsv_chimeratab_fp = os.path.join(working_dir, "chimera_tab.tsv") - unmapped_reads_fp = os.path.join(working_dir, "unmapped.fasta") - log_fp = os.path.join(working_dir, "otutab.log") + return otu_seq_count, chimera_seq_count - # Building otu table command - if use_vsearch: - cmd = ["vsearch", - "--usearch_global", filtered_reads_fp, - "--db", otus_fp, - # https://github.com/torognes/vsearch/issues/552 - "--strand", "plus" - ] - else: - cmd = ["usearch", - "-otutab", raw_reads_fp, - "-otus", otus_fp - ] - - cmd += [ - "-id", str(identity), - "-otutabout", tsv_otutab_fp, - "-notmatched", unmapped_reads_fp, - "-log", log_fp - ] - - cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] - - # run command - # we can do stats in another function - otutab_log = py_to_cli_interface(cmd, verbose) - - # step2 search chimeras against unmatched fasta - # build cmd - if chimera_map == "usearch": - if verbose: - print("This step is performed just to keep a mental check...") - print("Consider using vsearch to speed up this step...") - chimera_cmd = ["usearch", - "-search_global", unmapped_reads_fp, - "-db", chimeras_fp, - "-id", "1.0", - "-strand", "both", - "-otutabout", tsv_chimeratab_fp - ] - - chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] - else: - chimera_cmd = ["vsearch", - "--usearch_global", unmapped_reads_fp, - "--db", chimeras_fp, - "--id", "1.0", - "--strand", "both", - "--otutabout", tsv_chimeratab_fp, - ] - chimera_cmd += ["-threads", str(validate_threads_count(threads, verbose=verbose))] - # run command - # we can do stats in another function - if os.path.exists(chimeras_fp): - # 2nd layer of insurance - if len([ chimera for chimera in skbio.io.registry.read(chimeras_fp, format="fasta", verify=True)]) != 0: - chimera_log = py_to_cli_interface(chimera_cmd, verbose) ################################################################################ @@ -771,9 +730,10 @@ def _prep_results_for_artifact_api(working_dir, reads_mapped_to_chimeras_df = chimera_df.sum().to_frame().astype('int') reads_mapped_to_chimeras_df.columns = ["reads_mapped_to_chimeras"] reads_mapped_to_chimeras_df.index.name = "sample-id" - + else: - reads_mapped_to_chimeras_df = pd.DataFrame({"reads_mapped_to_chimeras": 0}, index=reads_mapped_to_features_df.index) + reads_mapped_to_chimeras_df = pd.DataFrame( + {"reads_mapped_to_chimeras": 0}, index=reads_mapped_to_features_df.index) if verbose: print("Now sorting features accroding to feature tab...") @@ -786,9 +746,10 @@ def _prep_results_for_artifact_api(working_dir, # if the dataset is big enough, usearch will return mapped zotus and zotutab not 1: 1 # furthermore, in usearch12, it seemed the mapped out output was broken, i.e. zotu in tab missing in mapped_zotus.fa # will raise a github issue to Edgar, meanwhile enforce fix here and in the table gen step - rep_seqs_lst = [ skbio.DNA(str(seq).upper(), metadata = {'id': seq.metadata['id']}) for seq in skbio.io.registry.read(features_fp, format="fasta", verify=True) ] - rep_seqs_id_lst = [ seq.metadata["id"] for seq in rep_seqs_lst ] - rep_sequences = pd.Series(rep_seqs_lst, index = rep_seqs_id_lst) + rep_seqs_lst = [skbio.DNA(str(seq).upper(), metadata={ + 'id': seq.metadata['id']}) for seq in skbio.io.registry.read(features_fp, format="fasta", verify=True)] + rep_seqs_id_lst = [seq.metadata["id"] for seq in rep_seqs_lst] + rep_sequences = pd.Series(rep_seqs_lst, index=rep_seqs_id_lst) rep_seqs_dropped = rep_sequences[~rep_sequences.index.isin(tab_df.index)] rep_sequences = rep_sequences[rep_sequences.index.isin(tab_df.index)] rep_sequences = rep_sequences.reindex(tab_df.index) @@ -797,8 +758,10 @@ def _prep_results_for_artifact_api(working_dir, if verbose: if not rep_seqs_dropped.empty: - print('The following zOTUs were extracted by not mapped to zOTU table: ', rep_seqs_dropped.index, sep = '\nzOTU_ID: ') - + print('The following features were extracted by not mapped to the feature table: ') + for item in rep_seqs_dropped.index: + print('Feature_ID: ', item) + if dt_type == "zotu": print("Successfully sorted zotutab and zotus...") elif dt_type == "otu": @@ -806,18 +769,19 @@ def _prep_results_for_artifact_api(working_dir, return table, rep_sequences, reads_mapped_to_features_df, reads_mapped_to_chimeras_df + def denoise_no_primer_pooled(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt, - trim_left: int = 0, - trunc_len: int = 0, - min_len: int = 50, - max_ee: float = 1.0, - n_threads: str = "auto", - min_size: int = 8, - min_zotu_mapping_identity: float = 1.00, # change to 0.97 in later release - unoise_alpha: float = 2.0, - use_vsearch: bool = False, - ) -> (biom.Table, pd.Series, qiime2.Metadata): - + trim_left: int = 0, + trunc_len: int = 0, + min_len: int = 50, + max_ee: float = 1.0, + n_threads: str = "auto", + min_size: int = 8, + min_zotu_mapping_identity: float = 1.00, # change to 0.97 in later release + unoise_alpha: float = 2.0, + use_vsearch: bool = False, + ) -> (biom.Table, pd.Series, qiime2.Metadata): + verbose = True demultiplexed_sequences_dirpath = str(demultiplexed_seqs) @@ -827,22 +791,23 @@ def denoise_no_primer_pooled(demultiplexed_seqs: SingleLanePerSampleSingleEndFas # need to sep for each sample as well filter_stats_df = _quality_control_cli(usearch_wd, trim_left=trim_left, trunc_right=trunc_len, min_len=min_len, max_ee=max_ee, use_vsearch=use_vsearch, threads=n_threads, verbose=verbose) - + filtered_reads_count, unique_reads_count, singletons_count, = _dereplicate_cli( usearch_wd, use_vsearch=use_vsearch, threads=n_threads, verbose=verbose) _unoise_cli( usearch_wd, min_size=min_size, unoise_alpha=unoise_alpha, use_vsearch=use_vsearch, verbose=verbose) - amplicons_count, zotus_count, = _split_zotu_chimera(usearch_wd, use_vsearch=use_vsearch, verbose=verbose) - + amplicons_count, zotus_count, = _split_zotu_chimera( + usearch_wd, use_vsearch=use_vsearch, verbose=verbose) + denoise_stats_str = "Total Reads: " + str(filtered_reads_count) + " ;Unique Reads :" + str(unique_reads_count) + \ " ;Singletons: " + str(singletons_count) + " ;Amplicons: " + \ str(amplicons_count) + " ;zOTUs: " + str(zotus_count) - _build_zotu_tab_cli(usearch_wd, use_vsearch=use_vsearch, identity = min_zotu_mapping_identity, - threads=n_threads, verbose=verbose) + _build_feature_tab_cli(usearch_wd, feature_type="zOTU", use_vsearch=use_vsearch, identity=min_zotu_mapping_identity, + threads=n_threads, chimera_map="vsearch", verbose=verbose) table, representative_sequences, reads_mapped_to_zotus_df, reads_mapped_to_chimeras_df = _prep_results_for_artifact_api( usearch_wd, verbose=verbose) - + # finally prep denoise stats df denoise_stats_df = input_stats_df.merge( filter_stats_df, how="left", left_index=True, right_index=True) @@ -857,41 +822,43 @@ def denoise_no_primer_pooled(demultiplexed_seqs: SingleLanePerSampleSingleEndFas denoise_stats_df['percent_of_input_mapped_to_chimeras'] = ( denoise_stats_df['reads_mapped_to_chimeras'] / denoise_stats_df['prior_to_maxee_filt']) * 100 denoise_stats_df["denoise_stats_pooled_mode"] = denoise_stats_str - + # if sample ids were swapped during the run, we need to swap the sample ids back if 'original_sample_id' in denoise_stats_df.columns: - + id_map_dict = denoise_stats_df['original_sample_id'].to_dict() denoise_stats_df.index = denoise_stats_df['original_sample_id'] denoise_stats_df.index.name = 'sample-id' denoise_stats_df.drop(columns=['original_sample_id'], inplace=True) table.update_ids(id_map_dict, axis='sample', inplace=True) - + denoise_stats_df.fillna(0, inplace=True) - + denoising_stats = qiime2.Metadata(denoise_stats_df) - + return table, representative_sequences, denoising_stats # do we need to expose additional uparse parameters here? -def cluster_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleEndFastqDirFmt, - trim_left: int = 0, - trunc_len: int = 0, - min_len: int = 50, - max_ee: float = 1.0, - n_threads: str = "auto", - min_size: int = 2, - ) -> (biom.Table, pd.Series, qiime2.Metadata): - + + +def cluster_no_primer_pooled(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt, + trim_left: int = 0, + trunc_len: int = 0, + min_len: int = 50, + max_ee: float = 1.0, + n_threads: str = "auto", + min_size: int = 2, + ) -> (biom.Table, pd.Series, qiime2.Metadata): + verbose = True - + if verbose: print("Since usearch version 9.0.2132, the ability to directly cluster OTUs to custom identity threshould had been removed. ") print("The reason for this is that indentity threshold other than 0.97 mess up the chimera detection step. ") print("Further expalnation can be found here: https://drive5.com/usearch/manual/uparse_otu_radius.html") print("BTW uparse is also usearch exclusive, no vsearch support here.") - demultiplexed_sequences_dirpath = str(demultiplexed_sequences) + demultiplexed_sequences_dirpath = str(demultiplexed_seqs) with tempfile.TemporaryDirectory() as usearch_wd: input_stats_df = _pool_samples( demultiplexed_sequences_dirpath, usearch_wd, verbose=verbose) @@ -904,16 +871,16 @@ def cluster_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleE usearch_wd, threads=n_threads, verbose=verbose) otu_seq_count, chimera_seq_count, = _uparse_cli( usearch_wd, min_size=min_size, verbose=verbose) - + denoise_stats_str = "Total Reads: " + str(filtered_reads_count) + " ;Unique Reads :" + str(unique_reads_count) + \ " ;Singletons: " + str(singletons_count) + " ;OTUs: " + str(otu_seq_count) + \ " :Chimeras: " + str(chimera_seq_count) - _build_otu_tab_cli(usearch_wd, - threads=n_threads, verbose=verbose) + _build_feature_tab_cli(usearch_wd, feature_type="OTU", + threads=n_threads, verbose=verbose, chimera_map="vsearch", use_vsearch=False, identity=0.97) table, representative_sequences, reads_mapped_to_otus_df, reads_mapped_to_chimeras_df = _prep_results_for_artifact_api( usearch_wd, verbose=verbose) - + # finally prep denoise stats df denoise_stats_df = input_stats_df.merge( filter_stats_df, how="left", left_index=True, right_index=True) @@ -928,62 +895,65 @@ def cluster_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleE denoise_stats_df['percent_of_input_mapped_to_chimeras'] = ( denoise_stats_df['reads_mapped_to_chimeras'] / denoise_stats_df['prior_to_maxee_filt']) * 100 denoise_stats_df["denoise_stats_pooled_mode"] = denoise_stats_str - + # if sample ids were swapped during the run, we need to swap the sample ids back if 'original_sample_id' in denoise_stats_df.columns: - + id_map_dict = denoise_stats_df['original_sample_id'].to_dict() denoise_stats_df.index = denoise_stats_df['original_sample_id'] denoise_stats_df.index.name = 'sample-id' denoise_stats_df.drop(columns=['original_sample_id'], inplace=True) table.update_ids(id_map_dict, axis='sample', inplace=True) - + denoise_stats_df.fillna(0, inplace=True) - + denoising_stats = qiime2.Metadata(denoise_stats_df) - + return table, representative_sequences, denoising_stats -def denoise_then_cluster_no_primer_pooled(demultiplexed_sequences: SingleLanePerSampleSingleEndFastqDirFmt, - trim_left: int = 0, - trunc_len: int = 0, - min_len: int = 50, - max_ee: float = 1.0, - perc_identity: float = 0.99, - n_threads: str = "auto", - min_size: int = 8, - unoise_alpha: float = 2.0, - use_vsearch: bool = False, - ) -> (biom.Table, pd.Series, qiime2.Metadata): - + +def denoise_then_cluster_no_primer_pooled(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt, + trim_left: int = 0, + trunc_len: int = 0, + min_len: int = 50, + max_ee: float = 1.0, + perc_identity: float = 0.99, + n_threads: str = "auto", + min_size: int = 8, + unoise_alpha: float = 2.0, + use_vsearch: bool = False, + ) -> (biom.Table, pd.Series, qiime2.Metadata): + verbose = True - demultiplexed_sequences_dirpath = str(demultiplexed_sequences) + demultiplexed_sequences_dirpath = str(demultiplexed_seqs) with tempfile.TemporaryDirectory() as usearch_wd: input_stats_df = _pool_samples( demultiplexed_sequences_dirpath, usearch_wd, use_vsearch=use_vsearch, verbose=verbose) # need to sep for each sample as well filter_stats_df = _quality_control_cli(usearch_wd, trim_left=trim_left, trunc_right=trunc_len, min_len=min_len, max_ee=max_ee, use_vsearch=use_vsearch, threads=n_threads, verbose=verbose) - + filtered_reads_count, unique_reads_count, singletons_count, = _dereplicate_cli( usearch_wd, use_vsearch=use_vsearch, threads=n_threads, verbose=verbose) _unoise_cli( usearch_wd, min_size=min_size, unoise_alpha=unoise_alpha, use_vsearch=use_vsearch, verbose=verbose) - - amplicons_count, zotus_count, = _split_zotu_chimera(usearch_wd, use_vsearch=use_vsearch, verbose=verbose) - - otus_count = _cluster_zotus_cli(usearch_wd, identity=perc_identity, use_vsearch=use_vsearch, verbose=verbose) - + + amplicons_count, zotus_count, = _split_zotu_chimera( + usearch_wd, use_vsearch=use_vsearch, verbose=verbose) + + otus_count = _cluster_zotus_cli( + usearch_wd, identity=perc_identity, use_vsearch=use_vsearch, verbose=verbose) + denoise_stats_str = "Total Reads: " + str(filtered_reads_count) + " ;Unique Reads :" + str(unique_reads_count) + \ " ;Singletons: " + str(singletons_count) + " ;Amplicons: " + \ - str(amplicons_count) + " ;zOTUs: " + str(zotus_count) + " ;OTUs: " + str(otus_count) - - _build_otu_tab_cli(usearch_wd, identity=perc_identity, use_vsearch=use_vsearch, - threads=n_threads, verbose=verbose) + str(amplicons_count) + " ;zOTUs: " + \ + str(zotus_count) + " ;OTUs: " + str(otus_count) + _build_feature_tab_cli(usearch_wd, feature_type="OTU", + threads=n_threads, verbose=verbose, chimera_map="vsearch", use_vsearch=use_vsearch, identity=perc_identity) table, representative_sequences, reads_mapped_to_otus_df, reads_mapped_to_chimeras_df = _prep_results_for_artifact_api( usearch_wd, verbose=verbose) - + # finally prep denoise stats df denoise_stats_df = input_stats_df.merge( filter_stats_df, how="left", left_index=True, right_index=True) @@ -998,19 +968,18 @@ def denoise_then_cluster_no_primer_pooled(demultiplexed_sequences: SingleLanePer denoise_stats_df['percent_of_input_mapped_to_chimeras'] = ( denoise_stats_df['reads_mapped_to_chimeras'] / denoise_stats_df['prior_to_maxee_filt']) * 100 denoise_stats_df["denoise_stats_pooled_mode"] = denoise_stats_str - + # if sample ids were swapped during the run, we need to swap the sample ids back if 'original_sample_id' in denoise_stats_df.columns: - + id_map_dict = denoise_stats_df['original_sample_id'].to_dict() denoise_stats_df.index = denoise_stats_df['original_sample_id'] denoise_stats_df.index.name = 'sample-id' denoise_stats_df.drop(columns=['original_sample_id'], inplace=True) table.update_ids(id_map_dict, axis='sample', inplace=True) - + denoise_stats_df.fillna(0, inplace=True) - + denoising_stats = qiime2.Metadata(denoise_stats_df) - - return table, representative_sequences, denoising_stats + return table, representative_sequences, denoising_stats diff --git a/q2_usearch/plugin_setup.py b/q2_usearch/plugin_setup.py index e3e89e1..5755b20 100644 --- a/q2_usearch/plugin_setup.py +++ b/q2_usearch/plugin_setup.py @@ -137,9 +137,9 @@ 'If set to auto, the plug-in will use (all vcores - 3) present on the node.'), }, inputs={ - 'demultiplexed_sequences': SampleData[SequencesWithQuality] | SampleData[JoinedSequencesWithQuality]}, + 'demultiplexed_seqs': SampleData[SequencesWithQuality] | SampleData[JoinedSequencesWithQuality]}, input_descriptions={ - 'demultiplexed_sequences': 'Quality screened, Adapter stripped, Joined(paired-end) sequences.'}, + 'demultiplexed_seqs': 'Quality screened, Adapter stripped, Joined(paired-end) sequences.'}, outputs=[('table', FeatureTable[Frequency]), ('representative_sequences', FeatureData[Sequence]), ('stats', SampleData[USEARCHStats])], @@ -195,9 +195,9 @@ 'use_vsearch': 'Use vsearch instead of usearch for computation . ' }, inputs={ - 'demultiplexed_sequences': SampleData[SequencesWithQuality] | SampleData[JoinedSequencesWithQuality]}, + 'demultiplexed_seqs': SampleData[SequencesWithQuality] | SampleData[JoinedSequencesWithQuality]}, input_descriptions={ - 'demultiplexed_sequences': 'Quality screened, Adapter stripped, Joined(paired-end) sequences.'}, + 'demultiplexed_seqs': 'Quality screened, Adapter stripped, Joined(paired-end) sequences.'}, outputs=[('table', FeatureTable[Frequency]), ('representative_sequences', FeatureData[Sequence]), ('stats', SampleData[USEARCHStats])], From 7193788dd3119b0fce589f24d5ba2185666f0720 Mon Sep 17 00:00:00 2001 From: magicprotoss Date: Fri, 16 Aug 2024 17:49:00 +0800 Subject: [PATCH 4/6] =?UTF-8?q?purge=20=F0=9F=92=A9;=20update=20api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- q2_usearch/_examples.py | 97 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 q2_usearch/_examples.py diff --git a/q2_usearch/_examples.py b/q2_usearch/_examples.py new file mode 100644 index 0000000..e660435 --- /dev/null +++ b/q2_usearch/_examples.py @@ -0,0 +1,97 @@ +# ---------------------------------------------------------------------------- +# Modified from q2-dada2 +# +# Copyright (c) 2016-2024, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + + +demux_single_url = \ + 'https://data.qiime2.org/usage-examples/moving-pictures/demux.qza' + +demux_paired_url = \ + 'https://data.qiime2.org/usage-examples/atacama-soils/demux-full.qza' + + +def denoise_no_primer_pooled(use): + demux_single = use.init_artifact_from_url('demux_single', demux_single_url) + + rep_seqs, table_unoise3, denoise_stats = use.action( + use.UsageAction('usearch', 'denoise_no_primer_pooled'), + use.UsageInputs( + demultiplexed_seqs=demux_single, + trunc_len=120 + ), + use.UsageOutputNames( + representative_sequences='zOTUs', + table='zOTU_Table', + stats='denoising_stats' + ) + ) + + rep_seqs.assert_output_type('FeatureData[Sequence]') + table_unoise3.assert_output_type('FeatureTable[Frequency]') + denoise_stats.assert_output_type('SampleData[USEARCHStats]') + + +def denoise_then_cluster_no_primer_pooled(use): + demux_single = use.init_artifact_from_url('demux_single', demux_single_url) + + rep_seqs, table_uclust, denoise_stats = use.action( + use.UsageAction('usearch', 'denoise_then_cluster_no_primer_pooled'), + use.UsageInputs( + demultiplexed_seqs=demux_paired, + trunc_len=120, + min_size=4 + ), + use.UsageOutputNames( + representative_sequences='99_OTUs', + table='OTU_Table', + stats='denoising_and_uclusting_stats' + ) + ) + + rep_seqs.assert_output_type('FeatureData[Sequence]') + table_uclust.assert_output_type('FeatureTable[Frequency]') + denoise_stats.assert_output_type('SampleData[USEARCHStats]') + + +def cluster_no_primer_pooled(use): + demux_single = use.init_artifact_from_url('demux_single', demux_single_url) + + rep_seqs, table_uparse, denoise_stats = use.action( + use.UsageAction('usearch', 'cluster_no_primer_pooled'), + use.UsageInputs( + demultiplexed_seqs=demux_paired, + trunc_len=120 + ), + use.UsageOutputNames( + representative_sequences='97_OTUs', + table='OTU_Table', + stats='clustering_stats' + ) + ) + + rep_seqs.assert_output_type('FeatureData[Sequence]') + table_uparse.assert_output_type('FeatureTable[Frequency]') + stats.assert_output_type('SampleData[USEARCHStats]') + +def merge_pairs(use): + demux_paired = use.init_artifact_from_url('demux_paired', demux_paired_url) + + merged_sequences, unmerged_sequences = use.action( + use.UsageAction('usearch', 'merge-pairs'), + use.UsageInputs( + demultiplexed_seqs=demux_paired + ), + use.UsageOutputNames( + merged_sequences='merged_seqs', + unmerged_sequences='unmerged_seqs' + ) + ) + + merged_sequences.assert_output_type('SampleData[JoinedSequencesWithQuality]') + unmerged_sequences.assert_output_type('SampleData[PairedEndSequencesWithQuality]') From 15e0dd849ea526b399ced9953518bb90cc4f51cf Mon Sep 17 00:00:00 2001 From: magicprotoss Date: Fri, 16 Aug 2024 18:42:08 +0800 Subject: [PATCH 5/6] update tests --- q2_usearch/_examples.py | 24 ++++++++++++------------ q2_usearch/plugin_setup.py | 14 +++++++++++++- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/q2_usearch/_examples.py b/q2_usearch/_examples.py index e660435..5a08342 100644 --- a/q2_usearch/_examples.py +++ b/q2_usearch/_examples.py @@ -11,7 +11,6 @@ demux_single_url = \ 'https://data.qiime2.org/usage-examples/moving-pictures/demux.qza' - demux_paired_url = \ 'https://data.qiime2.org/usage-examples/atacama-soils/demux-full.qza' @@ -28,7 +27,7 @@ def denoise_no_primer_pooled(use): use.UsageOutputNames( representative_sequences='zOTUs', table='zOTU_Table', - stats='denoising_stats' + denoising_stats='denoising_stats' ) ) @@ -40,10 +39,10 @@ def denoise_no_primer_pooled(use): def denoise_then_cluster_no_primer_pooled(use): demux_single = use.init_artifact_from_url('demux_single', demux_single_url) - rep_seqs, table_uclust, denoise_stats = use.action( + rep_seqs, table_uclust, denoise_uclust_stats = use.action( use.UsageAction('usearch', 'denoise_then_cluster_no_primer_pooled'), use.UsageInputs( - demultiplexed_seqs=demux_paired, + demultiplexed_seqs=demux_single, trunc_len=120, min_size=4 ), @@ -56,16 +55,16 @@ def denoise_then_cluster_no_primer_pooled(use): rep_seqs.assert_output_type('FeatureData[Sequence]') table_uclust.assert_output_type('FeatureTable[Frequency]') - denoise_stats.assert_output_type('SampleData[USEARCHStats]') - - + denoise_uclust_stats.assert_output_type('SampleData[USEARCHStats]') + + def cluster_no_primer_pooled(use): demux_single = use.init_artifact_from_url('demux_single', demux_single_url) - rep_seqs, table_uparse, denoise_stats = use.action( + rep_seqs, table_uparse, cluster_stats = use.action( use.UsageAction('usearch', 'cluster_no_primer_pooled'), use.UsageInputs( - demultiplexed_seqs=demux_paired, + demultiplexed_seqs=demux_single, trunc_len=120 ), use.UsageOutputNames( @@ -77,13 +76,14 @@ def cluster_no_primer_pooled(use): rep_seqs.assert_output_type('FeatureData[Sequence]') table_uparse.assert_output_type('FeatureTable[Frequency]') - stats.assert_output_type('SampleData[USEARCHStats]') - + cluster_stats.assert_output_type('SampleData[USEARCHStats]') + + def merge_pairs(use): demux_paired = use.init_artifact_from_url('demux_paired', demux_paired_url) merged_sequences, unmerged_sequences = use.action( - use.UsageAction('usearch', 'merge-pairs'), + use.UsageAction('usearch', 'merge_pairs'), use.UsageInputs( demultiplexed_seqs=demux_paired ), diff --git a/q2_usearch/plugin_setup.py b/q2_usearch/plugin_setup.py index 5755b20..afcc197 100644 --- a/q2_usearch/plugin_setup.py +++ b/q2_usearch/plugin_setup.py @@ -13,7 +13,6 @@ from q2_types.feature_table import FeatureTable, Frequency from q2_types.sample_data import SampleData from q2_types.per_sample_sequences import SequencesWithQuality, Sequences, JoinedSequencesWithQuality, PairedEndSequencesWithQuality - from qiime2.plugin import plugin @@ -22,6 +21,7 @@ # Register Usearch stats fmt from q2_usearch._format import USEARCHStats, USEARCHStatsFormat, USEARCHStatsDirFmt +import q2_usearch._examples as examples citations = Citations.load("citations.bib", package="q2_usearch") @@ -60,6 +60,9 @@ 'You MUST Also MERGE Your Reads If You are Using PAIRED-END Sequncing Protocol. \n' + "You Can Directly Use the 'Valid-Data' Provided by the Sequencing Center. \n" + 'Vsearch was supported in early development but became deprecated for shipment.', + examples={ + 'denoise_no_primer_pooled': examples.denoise_no_primer_pooled + }, citations=[citations['edgar2016unoise2']], parameter_descriptions={ 'trim_left': ("Position at which sequences should be trimmed due to low quality. " @@ -119,6 +122,9 @@ 'You MUST Also MERGE Your Reads If You are Using PAIRED-END Sequncing Protocol. \n' + "You Can Directly Use the 'Valid-Data' Provided by the Sequencing Center. \n" + "Note: Nowadays 97% OTUs are Mostly Considered Mostly OBSELETE. ", + examples={ + 'cluster_no_primer_pooled': examples.cluster_no_primer_pooled + }, citations=[citations['edgar2013uparse']], parameter_descriptions={ 'trim_left': ("Position at which sequences should be trimmed due to low quality. " @@ -173,6 +179,9 @@ 'You MUST Also MERGE Your Reads If You are Using PAIRED-END Sequncing Protocol ' + "You Can Directly Use the 'Valid-Data' Provided by the Sequencing Center " + 'Using Vsearch as a drop-in Replcacement is supported But with some CAVEATS, see https://github/xxx for details. ', + examples={ + 'denoise_then_cluster_no_primer_pooled': examples.denoise_then_cluster_no_primer_pooled + }, citations=[citations['edgar2016unoise2']], parameter_descriptions={ 'trim_left': ("Position at which sequences should be trimmed due to low quality. " @@ -301,6 +310,9 @@ 'merge_pairs function. See the usearch documentation for ' 'details on how paired-end merging is performed, and for ' 'more information on the parameters to this method.'), + examples={ + 'merge_pairs': examples.merge_pairs + }, citations=[citations['edgar2010usearch']] ) From 0e0c5fd1649811daf094330ba640f885dac8fe7c Mon Sep 17 00:00:00 2001 From: magicprotoss Date: Tue, 20 May 2025 16:55:25 +0800 Subject: [PATCH 6/6] Transition from setup.py & friends to pyproject.toml --- MANIFEST.in | 2 - Makefile | 2 +- README.md | 21 +- README_ZH_CN.md | 24 +- pyproject.toml | 50 + q2_usearch/__init__.py | 8 +- q2_usearch/_sintax.py | 2 +- q2_usearch/_version.py | 526 -------- setup.cfg | 11 - setup.py | 30 - versioneer.py | 1857 -------------------------- "\342\200\216conda-recipe/meta.yaml" | 37 + 12 files changed, 112 insertions(+), 2458 deletions(-) delete mode 100644 MANIFEST.in create mode 100644 pyproject.toml delete mode 100644 q2_usearch/_version.py delete mode 100644 setup.cfg delete mode 100644 setup.py delete mode 100644 versioneer.py create mode 100644 "\342\200\216conda-recipe/meta.yaml" diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index a80efde..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include versioneer.py -include q2_plugin_name/_version.py diff --git a/Makefile b/Makefile index 0c7555d..1c33853 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ test-cov: all py.test --cov=q2_usearch install: all - $(PYTHON) setup.py install + $(PYTHON) -m pip install -v . dev: all pip install pre-commit diff --git a/README.md b/README.md index b102b87..1bd7c04 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,3 @@ ---- -editor_options: - markdown: - wrap: 72 ---- - # q2-usearch QIIME2 plug-in for [USEARCH](https://github.com/rcedgar/usearch12/) @@ -43,7 +37,7 @@ git clone https://github.com/magicprotoss/q2-usearch . Step 2: Activate the QIIME2 conda enviroment you wish to install to ``` bash -# conda activate qiime2-amplicon-2024.5 +# conda activate qiime2-amplicon-2025.4 conda activate ``` @@ -57,23 +51,26 @@ conda env list | grep qiime2 The env's name should appear in your terminal ``` bash -# qiime2-amplicon-2024.5 /home/navi/miniconda3/envs/qiime2-amplicon-2024.5 +# qiime2-amplicon-2025.4 /home/navi/miniconda3/envs/qiime2-amplicon-2025.4 ``` Step 3: Change directory to the project folder and execute the following command ``` bash -cd q2-usearch && python ./setup.py install +cd q2-usearch && pip install . qiime dev refresh-cache ``` Step 4: Install [seqkit2](https://bioinf.shenwei.me/seqkit/) and -[usearch12](https://github.com/rcedgar/usearch12) using mamba/conda +[usearch12](https://github.com/rcedgar/usearch12) using conda + +Note: the q2-usearch plug-in is compatible with usearch11 as well. If you wish to use usearch11, just skip installing usearch using conda. You can manually download the binary file from [GitHub](https://github.com/rcedgar/usearch_old_binaries), rename it to "usearch", add execute permission to it and move it to one of the executable paths on your system. ``` bash -mamba install -c bioconda seqkit">=2.0.0" usearch -# conda install -c bioconda seqkit">=2.0.0" usearch +conda install -c bioconda --no-deps seqkit">=2.0.0" usearch +# if you were using an older version of conda and got stuck at the "solving environment" stage for too long, consider using mamba +# mamba install -c bioconda seqkit">=2.0.0" usearch ``` If every thing went smoothly, you should be seeing sth. like this diff --git a/README_ZH_CN.md b/README_ZH_CN.md index 6d50131..b6e4797 100644 --- a/README_ZH_CN.md +++ b/README_ZH_CN.md @@ -1,9 +1,3 @@ ---- -editor_options: - markdown: - wrap: 72 ---- - # q2-usearch 这是一个把 [USEARCH](https://github.com/rcedgar/usearch12/) 装进 @@ -30,7 +24,7 @@ editor_options: 第一步: 激活我们想要安装到的 QIIME2 conda 环境 ``` bash -# conda activate qiime2-amplicon-2024.5 +# conda activate qiime2-amplicon-2025.4 conda activate <目标q2环境名称> ``` @@ -43,7 +37,7 @@ conda env list | grep qiime2 这样当前conda下所有环境名中带有qiime2字符的虚拟环境都会被检索出来 ``` bash -# qiime2-amplicon-2024.5 /home/navi/miniconda3/envs/qiime2-amplicon-2024.5 +# qiime2-amplicon-2025.4 /home/navi/miniconda3/envs/qiime2-amplicon-2025.4 ``` 第二步: 下载插件 @@ -56,18 +50,20 @@ git clone https://github.com/magicprotoss/q2-usearch . ``` bash cd q2-usearch # 切换路径 -python ./setup.py install # 安装插件 +pip install . # 安装插件 qiime dev refresh-cache # 刷新QIIME2命令行界面缓存 ``` 第四步: -使用mamba或者conda安装[seqkit2](https://mp.weixin.qq.com/s/ucCA4-eZINkppdyQFItXHw)和[usearch12](https://mp.weixin.qq.com/s/i0zzOP5IRNdY9PfqHbpDEQ) +使用conda安装[seqkit2](https://mp.weixin.qq.com/s/ucCA4-eZINkppdyQFItXHw)和[usearch12](https://mp.weixin.qq.com/s/i0zzOP5IRNdY9PfqHbpDEQ) + +注意: +本插件也支持usearch11,如果要用老版本的话跳过conda安装usearch,去[GitHub](https://github.com/rcedgar/usearch_old_binaries)上下载可执行文件,重命名成"usearch"后加权并放到$PATH的文件夹里. ``` bash -# 建议使用mamba -# 国内网络不稳conda圈圈转到一半容易报断连错误 -mamba install -c bioconda seqkit">=2.0.0" usearch -# conda install -c bioconda seqkit">=2.0.0" usearch +conda install -c bioconda --no-deps seqkit">=2.0.0" usearch +# 如果电脑上conda版本太老卡“solving environment”的话,可以使用mamba +# mamba install -c bioconda --no-deps seqkit">=2.0.0" usearch ``` 测试一下usearch是否可以被正常调用 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a4b4eb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,50 @@ +[project] +name = "q2-usearch" +authors = [ + { name = "RobertEdgar, magicprotoss and biodps", email = "magicprotoss@hotmail.com" } +] +description = "This is a plug-in for USEARCH integration into QIIME2." +readme = {file = "README.md", content-type = "text/markdown"} +license = {file = "LICENSE"} +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/magicprotoss/q2-usearch" +Repository = "https://github.com/magicprotoss/q2-usearch" + +[project.entry-points.'qiime2.plugins'] +"q2-usearch" = "q2_usearch.plugin_setup:plugin" + +[build-system] +requires = [ + "setuptools", + "versioningit", + "wheel" +] +build-backend = "setuptools.build_meta" + +[tool.versioningit.vcs] +method = "git-archive" +describe-subst = "$Format:%(describe)$" +default-tag = "0.0.1" + +[tool.versioningit.next-version] +method = "minor" + +[tool.versioningit.format] +distance = "{base_version}+{distance}.{vcs}{rev}" +dirty = "{base_version}+{distance}.{vcs}{rev}.dirty" +distance-dirty = "{base_version}+{distance}.{vcs}{rev}.dirty" + +[tool.versioningit.write] +file = "q2_usearch/_version.py" + +[tool.setuptools] +include-package-data = false + +[tool.setuptools.packages.find] +where = ["."] +include = ["q2_usearch*"] + +[tool.setuptools.package-data] +q2_usearch = ["**/*"] \ No newline at end of file diff --git a/q2_usearch/__init__.py b/q2_usearch/__init__.py index bcdfd0c..27176df 100644 --- a/q2_usearch/__init__.py +++ b/q2_usearch/__init__.py @@ -12,10 +12,10 @@ # modified from q2-vsearch from ._merge_pairs import merge_pairs -from ._version import get_versions - -__version__ = get_versions()["version"] -del get_versions +try: + from ._version import __version__ +except ModuleNotFoundError: + __version__ = '0.0.0+notfound' __all__ = ['denoise_no_primer_pooled', 'cluster_no_primer_pooled', 'denoise_then_cluster_no_primer_pooled', 'merge_pairs', 'sintax'] diff --git a/q2_usearch/_sintax.py b/q2_usearch/_sintax.py index d9fbf65..dd73008 100644 --- a/q2_usearch/_sintax.py +++ b/q2_usearch/_sintax.py @@ -50,7 +50,7 @@ def _split_tax_into_ranks_and_get_max_levels(tax_df_in, sep): def _replace_q2_split_w_usearch_split_and_remove_leading_trailing_blanks(rank_in): - rank_out = re.sub(r"(?<=\b[dpcofgs])\w*__", ':', str(rank_in).strip()) + rank_out = re.sub(r"(?<=\b[dkpcofgs])\w*__", ':', str(rank_in).strip()) return rank_out diff --git a/q2_usearch/_version.py b/q2_usearch/_version.py deleted file mode 100644 index e514b17..0000000 --- a/q2_usearch/_version.py +++ /dev/null @@ -1,526 +0,0 @@ -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) - -"""Git implementation of _version.py.""" - -# fmt: off - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "pep440" - cfg.tag_prefix = "" - cfg.parentdir_prefix = "q2-plugin-name-" - cfg.versionfile_source = "q2_plugin_name/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip().decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post0.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post0.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index dd963ba..0000000 --- a/setup.cfg +++ /dev/null @@ -1,11 +0,0 @@ -[versioneer] -VCS=git -style=pep440 -versionfile_source = q2_usearch/_version.py -versionfile_build = q2_usearch/_version.py -tag_prefix = -parentdir_prefix = q2-usearch- - -[flake8] -max-line-length = 88 -extend-ignore = E203 diff --git a/setup.py b/setup.py deleted file mode 100644 index 4cde218..0000000 --- a/setup.py +++ /dev/null @@ -1,30 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2024, magicprotoss;biodps. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file LICENSE, distributed with this software. -# ---------------------------------------------------------------------------- - -from setuptools import find_packages, setup - -import versioneer - -setup( - name="q2-usearch", - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - license="BSD-3-Clause", - packages=find_packages(), - author="magicprotoss; biodps", - author_email="magicprotoss@hotmail.com", - description="This is a plug-in for USEARCH integration into QIIME2.", - url="https://github.com/magicprotoss/q2-usearch", - entry_points={ - "qiime2.plugins": ["q2-usearch=q2_usearch.plugin_setup:plugin"] - }, - package_data={ - "q2_usearch": ["citations.bib"], - }, - zip_safe=False, -) diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 08a4cec..0000000 --- a/versioneer.py +++ /dev/null @@ -1,1857 +0,0 @@ -# Version: 0.19 -# flake8: noqa -# fmt: off - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain -* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in distutils-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -* `pip install versioneer` to somewhere in your $PATH -* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md)) -* run `versioneer install` in your source tree, commit the results -* Verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" - -import configparser -import errno -import json -import os -import re -import subprocess -import sys - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - me = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(me)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise EnvironmentError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.ConfigParser() - with open(setup_cfg, "r") as f: - parser.read_file(f) - VCS = parser.get("versioneer", "VCS") # mandatory - - def get(parser, name): - if parser.has_option("versioneer", name): - return parser.get("versioneer", name) - return None - cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = get(parser, "style") or "" - cfg.versionfile_source = get(parser, "versionfile_source") - cfg.versionfile_build = get(parser, "versionfile_build") - cfg.tag_prefix = get(parser, "tag_prefix") - if cfg.tag_prefix in ("''", '""'): - cfg.tag_prefix = "" - cfg.parentdir_prefix = get(parser, "parentdir_prefix") - cfg.verbose = get(parser, "verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip().decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -LONG_VERSION_PY['git'] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = p.communicate()[0].strip().decode() - if p.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%%s*" %% tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], - cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post0.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post0.dev%%d" %% pieces["distance"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(manifest_in, versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] - if ipy: - files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: - pass - if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.19) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except EnvironmentError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post0.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post0.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass=None): - """Get the custom setuptools/distutils subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to both distutils and setuptools - from distutils.core import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in both distutils and setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py = cmds['build_py'] - elif "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py - else: - from distutils.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if "setuptools" in sys.modules: - from setuptools.command.build_ext import build_ext as _build_ext - else: - from distutils.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - from py2exe.distutils_buildexe import py2exe as _py2exe - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist = cmds['sdist'] - elif "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist - else: - from distutils.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -INIT_PY_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - - -def do_setup(): - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except EnvironmentError: - old = "" - if INIT_PY_SNIPPET not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) diff --git "a/\342\200\216conda-recipe/meta.yaml" "b/\342\200\216conda-recipe/meta.yaml" new file mode 100644 index 0000000..70e1be0 --- /dev/null +++ "b/\342\200\216conda-recipe/meta.yaml" @@ -0,0 +1,37 @@ +package: + name: q2-usearch + version: {{ PLUGIN_VERSION }} +source: + path: .. +build: + script: make install +requirements: + host: + - python {{ python }} + - setuptools + - versioningit + - wheel + run: + - python {{ python }} + - seqkit >= 2.0.0 + - usearch >= 12.0 + - xlsxwriter + - openpyxl + - qiime2 {{ qiime2_epoch }}.* + - q2-types {{ qiime2_epoch }}.* + build: + - setuptools + - versioningit +test: + requires: + - qiime2 >={{ qiime2 }} + - q2-types >={{ q2_types }} + - pytest + imports: + - q2_usearch + - qiime2.plugins.usearch + commands: + - py.test --pyargs usearch +about: + home: https://github.com/magicprotoss/q2-usearch + license: BSD-3-Clause \ No newline at end of file