@@ -630,6 +630,7 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
630630 orig_pdb_name = db .get_pdb_name (orig_pdb_filepath )
631631 orig_pdb_df = PandasPdb ().read_pdb (new_pdb_filepath ).df ['ATOM' ]
632632 unique_chain_ids = np .unique (orig_pdb_df ['chain_id' ].values )
633+
633634 """Ascertain the chain ID corresponding to the original PDB file, using one of two available methods.
634635 Method 1: Used with datasets such as EVCoupling adopting .atom filename extensions (e.g., 4DI3C.atom)
635636 Method 2: Used with datasets such as DeepHomo adopting regular .pdb filename extensions (e.g., 2FNUA.pdb)"""
@@ -643,9 +644,24 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
643644 else : # Method 2: Try to use unique chain IDs
644645 # Assume the first/second index is the first non-empty chain ID (e.g., 'A')
645646 orig_pdb_chain_id = unique_chain_ids [0 ] if (unique_chain_ids [0 ] != '' ) else unique_chain_ids [1 ]
647+
648+ # Update existing parsed chains to contain the newly-recovered chain ID
649+ parsed_dir = os .path .join (interim_dataset_dir , 'parsed' , pdb_code )
650+ parsed_filenames = [
651+ os .path .join (parsed_dir , filename ) for filename in os .listdir (parsed_dir ) if new_pdb_code in filename
652+ ]
653+ parsed_filenames .sort ()
654+ # Load in the existing Pair
655+ chain_df = pd .read_pickle (parsed_filenames [chain_number - 1 ])
656+ # Update the corresponding chain ID
657+ chain_df .chain = orig_pdb_chain_id
658+ # Save the updated Pair
659+ chain_df .to_pickle (parsed_filenames [chain_number - 1 ])
660+
646661 # Update the existing Pair to contain the newly-recovered chain ID
647662 pair_dir = os .path .join (interim_dataset_dir , 'pairs' , pdb_code )
648663 pair_filenames = [os .path .join (pair_dir , filename ) for filename in os .listdir (pair_dir ) if new_pdb_code in filename ]
664+ pair_filenames .sort ()
649665 # Load in the existing Pair
650666 with open (pair_filenames [0 ], 'rb' ) as f :
651667 pair = dill .load (f )
0 commit comments