@@ -32,12 +32,9 @@ def get_queries_pairwise(
3232 df = pandas .read_csv (input_path , sep = sep )
3333 assert "id" in df .columns and "sequence" in df .columns
3434 queries = [
35- (str (df ["id" ][0 ])+ '&' + str (seq_id ), [ df [ " sequence" ][ 0 ] .upper (), sequence . upper ()] , None )
36- for i , (seq_id , sequence ) in enumerate (df [["id" , "sequence" ]].itertuples (index = False )) if i != 0
35+ (str (df ["id" ][0 ])+ '&' + str (seq_id ), sequence .upper (), None )
36+ for i , (seq_id , sequence ) in enumerate (df [["id" , "sequence" ]].itertuples (index = False ))
3737 ]
38- for i in range (len (queries )):
39- if len (queries [i ][1 ]) == 1 :
40- queries [i ] = (queries [i ][0 ], queries [i ][1 ][0 ], None )
4138 elif input_path .suffix == ".a3m" :
4239 raise NotImplementedError ()
4340 elif input_path .suffix in [".fasta" , ".faa" , ".fa" ]:
@@ -47,9 +44,7 @@ def get_queries_pairwise(
4744 sequence = sequence .upper ()
4845 if sequence .count (":" ) == 0 :
4946 # Single sequence
50- if i == 0 :
51- continue
52- queries .append ((headers [0 ]+ '&' + header , [sequences [0 ],sequence ], None ))
47+ queries .append ((header , sequence , None ))
5348 else :
5449 # Complex mode
5550 queries .append ((header , sequence .upper ().split (":" ), None ))
@@ -449,9 +444,9 @@ def main():
449444 args = parser .parse_args ()
450445
451446 if args .interaction_scan :
452- queries , is_complex = get_queries_pairwise (args .query , None )
447+ queries , is_complex = get_queries_pairwise (args .query )
453448 else :
454- queries , is_complex = get_queries (args .query , None )
449+ queries , is_complex = get_queries (args .query )
455450
456451 queries_unique = []
457452 for job_number , (raw_jobname , query_sequences , a3m_lines ) in enumerate (queries ):
@@ -481,10 +476,9 @@ def main():
481476 query_seqs_cardinality ,
482477 ) in enumerate (queries_unique ):
483478 if job_number == 0 :
484- f .write (f">{ raw_jobname } _0\n { query_sequences [0 ]} \n " )
485- f .write (f">{ raw_jobname } \n { query_sequences [1 ]} \n " )
479+ f .write (f">{ raw_jobname } _0\n { query_sequences } \n " )
486480 else :
487- f .write (f">{ raw_jobname } \n { query_sequences [ 1 ] } \n " )
481+ f .write (f">{ queries_unique [ 0 ][ 0 ] + '&' + raw_jobname } \n { query_sequences } \n " )
488482 else :
489483 with query_file .open ("w" ) as f :
490484 for job_number , (
@@ -498,18 +492,6 @@ def main():
498492 args .mmseqs ,
499493 ["createdb" , query_file , args .base .joinpath ("qdb" ), "--shuffle" , "0" ],
500494 )
501- with args .base .joinpath ("qdb.lookup" ).open ("w" ) as f :
502- id = 0
503- file_number = 0
504- for job_number , (
505- raw_jobname ,
506- query_sequences ,
507- query_seqs_cardinality ,
508- ) in enumerate (queries_unique ):
509- for seq in query_sequences :
510- f .write (f"{ id } \t { raw_jobname } \t { file_number } \n " )
511- id += 1
512- file_number += 1
513495
514496 mmseqs_search_monomer (
515497 mmseqs = args .mmseqs ,
@@ -542,30 +524,66 @@ def main():
542524 interaction_scan = args .interaction_scan ,
543525 )
544526
527+ if args .interaction_scan :
528+ if len (queries_unique ) > 1 :
529+ for i in range (len (queries_unique )- 2 ):
530+ idx = 2 + i * 2
531+ ## delete duplicated query files 2.paired, 4.paired...
532+ os .remove (args .base .joinpath (f"{ idx } .paired.a3m" ))
533+ for j in range (len (queries_unique )- 2 ):
534+ # replace targets' right file name
535+ id1 = j * 2 + 3
536+ id2 = j + 2
537+ os .replace (args .base .joinpath (f"{ id1 } .paired.a3m" ), args .base .joinpath (f"{ id2 } .paired.a3m" ))
538+
545539 id = 0
546- for job_number , (
547- raw_jobname ,
548- query_sequences ,
549- query_seqs_cardinality ,
550- ) in enumerate (queries_unique ):
551- unpaired_msa = []
552- paired_msa = None
553- if len (query_seqs_cardinality ) > 1 :
540+ if not args .interaction_scan :
541+ for job_number , (
542+ raw_jobname ,
543+ query_sequences ,
544+ query_seqs_cardinality ,
545+ ) in enumerate (queries_unique ):
546+ unpaired_msa = []
547+ paired_msa = None
548+ if len (query_seqs_cardinality ) > 1 :
549+ paired_msa = []
550+ else :
551+ for seq in query_sequences :
552+ with args .base .joinpath (f"{ id } .a3m" ).open ("r" ) as f :
553+ unpaired_msa .append (f .read ())
554+ args .base .joinpath (f"{ id } .a3m" ).unlink ()
555+ if len (query_seqs_cardinality ) > 1 :
556+ with args .base .joinpath (f"{ id } .paired.a3m" ).open ("r" ) as f :
557+ paired_msa .append (f .read ())
558+ args .base .joinpath (f"{ id } .paired.a3m" ).unlink ()
559+ id += 1
560+ msa = msa_to_str (
561+ unpaired_msa , paired_msa , query_sequences , query_seqs_cardinality
562+ )
563+ args .base .joinpath (f"{ job_number } .a3m" ).write_text (msa )
564+ else :
565+ for job_number , _ in enumerate (queries_unique [:- 1 ]):
566+ query_sequences = [queries_unique [0 ][1 ], queries_unique [job_number + 1 ][1 ]]
567+ unpaired_msa = []
554568 paired_msa = []
555- for seq in query_sequences :
556- with args .base .joinpath (f"{ id } .a3m" ).open ("r" ) as f :
569+ with args .base .joinpath (f"0.a3m" ).open ("r" ) as f :
570+ unpaired_msa .append (f .read ())
571+ with args .base .joinpath (f"{ job_number + 1 } .a3m" ).open ("r" ) as f :
557572 unpaired_msa .append (f .read ())
558- args .base .joinpath (f"{ id } .a3m" ).unlink ()
559- if len (query_seqs_cardinality ) > 1 :
560- with args .base .joinpath (f"{ id } .paired.a3m" ).open ("r" ) as f :
561- paired_msa .append (f .read ())
562- args .base .joinpath (f"{ id } .paired.a3m" ).unlink ()
563- id += 1
564- msa = msa_to_str (
565- unpaired_msa , paired_msa , query_sequences , query_seqs_cardinality
566- )
567- args .base .joinpath (f"{ job_number } .a3m" ).write_text (msa )
568573
574+ with args .base .joinpath (f"0.paired.a3m" ).open ("r" ) as f :
575+ paired_msa .append (f .read ())
576+ with args .base .joinpath (f"{ job_number + 1 } .paired.a3m" ).open ("r" ) as f :
577+ paired_msa .append (f .read ())
578+ msa = msa_to_str (
579+ unpaired_msa , paired_msa , query_sequences , [1 ,1 ]
580+ )
581+ args .base .joinpath (f"{ job_number } _final.a3m" ).write_text (msa )
582+ for job_number , _ in enumerate (queries_unique ):
583+ args .base .joinpath (f"{ job_number } .a3m" ).unlink ()
584+ args .base .joinpath (f"{ job_number } .paired.a3m" ).unlink ()
585+ for job_number , _ in enumerate (queries_unique [:- 1 ]):
586+ os .replace (args .base .joinpath (f"{ job_number } _final.a3m" ), args .base .joinpath (f"{ job_number } .a3m" ))
569587 query_file .unlink ()
570588 run_mmseqs (args .mmseqs , ["rmdb" , args .base .joinpath ("qdb" )])
571589 run_mmseqs (args .mmseqs , ["rmdb" , args .base .joinpath ("qdb_h" )])
0 commit comments