33import tarfile
44import subprocess
55import pandas as pd
6- import filecmp
6+ from tqdm import tqdm
7+ import glob
78
89from utils .vasp .database import find_vasp_directories , check_convergence
910from utils .generic import get_latest_file_iteration
1011from utils .jobfile import jobfile
11- from tqdm import tqdm
1212
1313def get_slurm_jobs_working_directories (username = "hmai" ):
1414 command = f'squeue -u { username } -o "%i %Z"'
@@ -78,7 +78,7 @@ def reconverge_all(
7878 # Prioritize directories without vasp.log
7979 dirs_to_check = dirs_without_log + dirs_with_log
8080
81- for i , dir in enumerate (dirs_to_check ):
81+ for i , dir in enumerate (tqdm ( dirs_to_check , desc = "Reconverging Directories" ) ):
8282 if not check_convergence (dir ):
8383 if i + len (running_queued_job_directories ) > self .max_submissions :
8484 leftover_calcs_exceeding_queue_limit .append (dir )
@@ -87,9 +87,6 @@ def reconverge_all(
8787 dir , calc_type , HPC , VASP_version , CPU , walltime , cpu_per_node
8888 )
8989 dirs_to_search_next_time .append (dir )
90- print (f"RERUNNING: { dir } " )
91- else :
92- print (f"CONVERGED: { dir } " )
9390
9491 self .update_resubmit_log (
9592 dirs_to_search_next_time
@@ -99,16 +96,19 @@ def reconverge_all(
9996 return dirs_to_search_next_time
10097
10198 def load_non_converged_paths (self , from_dataframe_path ):
102- if from_dataframe_path :
103- df = pd .read_pickle (from_dataframe_path )
104- return [
105- (
106- path .rstrip (os .sep + "OUTCAR" )
107- if path .endswith (os .sep + "OUTCAR" )
108- else path
109- )
110- for path in df ["filepath" ].tolist ()
111- ]
99+ if from_dataframe_path is not None :
100+ try :
101+ df = pd .read_pickle (from_dataframe_path )
102+ return [
103+ (
104+ path .rstrip (os .sep + "OUTCAR" )
105+ if path .endswith (os .sep + "OUTCAR" )
106+ else path
107+ )
108+ for path in df ["filepath" ].tolist ()
109+ ]
110+ except Exception as e :
111+ print (f"Failed to read dataframe: { from_dataframe_path } with exception { e } " )
112112 return self .reconverge_from_log_file ()
113113
114114 def update_resubmit_log (self , dirs_to_search_next_time ):
@@ -126,6 +126,9 @@ def reconverge(
126126 walltime = 24 ,
127127 cpu_per_node = 128 ,
128128 ):
129+ if glob .fnmatch .fnmatch (os .path .basename (dirpath ), 'error_run*' ):
130+ #print(f"Skipping {dirpath} as it contains an error_run folder")
131+ return
129132 self .handle_error_run_files (dirpath )
130133 reconverge_methods = {
131134 "static" : self .reconverge_static ,
@@ -163,7 +166,7 @@ def handle_error_run_files(self, dirpath, filename_to_compare="vasp.log"):
163166 )
164167 os .makedirs (error_run_folder_path , exist_ok = True )
165168 self .move_files_to_error_run_folder (dirpath , error_run_folder_path )
166-
169+
167170 def move_files_to_error_run_folder (self , dirpath , error_run_folder_path ):
168171 for f in os .listdir (dirpath ):
169172 if ("error" in f and "tar" in f ) or f .endswith (".sh" ):
@@ -283,7 +286,7 @@ def reconverge_generic(
283286 template_type = template_filename .split ('_' )[1 ].split ('.py' )[0 ]
284287 script_name = os .path .join (
285288 self .script_template_dir ,
286- f"{ template_type } _Custodian_{ HPC } .sh" ,
289+ f"{ template_filename . split ( '_' )[ 1 ]. split ( '.py' )[ 0 ] } _Custodian_{ HPC } .sh"
287290 )
288291 job = jobfile (
289292 file_path = script_name ,
0 commit comments