Skip to content

Commit 0fe481f

Browse files
author
Han Lin Mai
committed
2 parents 0ae4a05 + 8180379 commit 0fe481f

3 files changed

Lines changed: 25 additions & 22 deletions

File tree

jobscript_templates/CustodianScripts/Static_Custodian_Setonix.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
#SBATCH --time={WALLTIMESTRING}
77
#SBATCH --partition=work
88
#SBATCH --export=NONE
9-
#SBATCH --exclusive
10-
9+
##SBATCH --exclusive
10+
#SBATCH --mem=32GB
1111
module load vasp/5.4.4
1212
cd "$PBS_O_WORKDIR"
1313

jobscript_templates/jobfile-Setonix-StaticImage-DDEC6

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ handlers = [VaspErrorHandler(output_filename=output_filename), UnconvergedErrorH
2727
jobs = [VaspJob(sys.argv[1:], output_file=output_filename, suffix = "",
2828
settings_override = [{"dict": "INCAR", "action": {"_set": {"NSW": 1, "LAECHG": True, "LCHARGE": True, "NELM": 500, "EDIFF": 1E-5}}}])]
2929
c = Custodian(handlers, jobs, max_errors=10)
30-
c.run()' > StaticImage-DDEC6-custodian.py
30+
c.run()' > custodian.py
3131

32-
python StaticImage-DDEC6-custodian.py $run_cmd vasp_std &> vasp.log
32+
python custodian.py $run_cmd vasp_std &> vasp.log
3333

3434
echo '<net charge>
3535
0.0 <-- specifies the net charge of the unit cell (defaults to 0.0 if nothing specified)

utils/vasp/resubmitter.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
import tarfile
44
import subprocess
55
import pandas as pd
6-
import filecmp
6+
from tqdm import tqdm
7+
import glob
78

89
from utils.vasp.database import find_vasp_directories, check_convergence
910
from utils.generic import get_latest_file_iteration
1011
from utils.jobfile import jobfile
11-
from tqdm import tqdm
1212

1313
def get_slurm_jobs_working_directories(username="hmai"):
1414
command = f'squeue -u {username} -o "%i %Z"'
@@ -78,7 +78,7 @@ def reconverge_all(
7878
# Prioritize directories without vasp.log
7979
dirs_to_check = dirs_without_log + dirs_with_log
8080

81-
for i, dir in enumerate(dirs_to_check):
81+
for i, dir in enumerate(tqdm(dirs_to_check, desc="Reconverging Directories")):
8282
if not check_convergence(dir):
8383
if i + len(running_queued_job_directories) > self.max_submissions:
8484
leftover_calcs_exceeding_queue_limit.append(dir)
@@ -87,9 +87,6 @@ def reconverge_all(
8787
dir, calc_type, HPC, VASP_version, CPU, walltime, cpu_per_node
8888
)
8989
dirs_to_search_next_time.append(dir)
90-
print(f"RERUNNING: {dir}")
91-
else:
92-
print(f"CONVERGED: {dir}")
9390

9491
self.update_resubmit_log(
9592
dirs_to_search_next_time
@@ -99,16 +96,19 @@ def reconverge_all(
9996
return dirs_to_search_next_time
10097

10198
def load_non_converged_paths(self, from_dataframe_path):
102-
if from_dataframe_path:
103-
df = pd.read_pickle(from_dataframe_path)
104-
return [
105-
(
106-
path.rstrip(os.sep + "OUTCAR")
107-
if path.endswith(os.sep + "OUTCAR")
108-
else path
109-
)
110-
for path in df["filepath"].tolist()
111-
]
99+
if from_dataframe_path is not None:
100+
try:
101+
df = pd.read_pickle(from_dataframe_path)
102+
return [
103+
(
104+
path.rstrip(os.sep + "OUTCAR")
105+
if path.endswith(os.sep + "OUTCAR")
106+
else path
107+
)
108+
for path in df["filepath"].tolist()
109+
]
110+
except Exception as e:
111+
print(f"Failed to read dataframe: {from_dataframe_path} with exception {e}")
112112
return self.reconverge_from_log_file()
113113

114114
def update_resubmit_log(self, dirs_to_search_next_time):
@@ -126,6 +126,9 @@ def reconverge(
126126
walltime=24,
127127
cpu_per_node=128,
128128
):
129+
if glob.fnmatch.fnmatch(os.path.basename(dirpath), 'error_run*'):
130+
#print(f"Skipping {dirpath} as it contains an error_run folder")
131+
return
129132
self.handle_error_run_files(dirpath)
130133
reconverge_methods = {
131134
"static": self.reconverge_static,
@@ -163,7 +166,7 @@ def handle_error_run_files(self, dirpath, filename_to_compare="vasp.log"):
163166
)
164167
os.makedirs(error_run_folder_path, exist_ok=True)
165168
self.move_files_to_error_run_folder(dirpath, error_run_folder_path)
166-
169+
167170
def move_files_to_error_run_folder(self, dirpath, error_run_folder_path):
168171
for f in os.listdir(dirpath):
169172
if ("error" in f and "tar" in f) or f.endswith(".sh"):
@@ -283,7 +286,7 @@ def reconverge_generic(
283286
template_type = template_filename.split('_')[1].split('.py')[0]
284287
script_name = os.path.join(
285288
self.script_template_dir,
286-
f"{template_type}_Custodian_{HPC}.sh",
289+
f"{template_filename.split('_')[1].split('.py')[0]}_Custodian_{HPC}.sh"
287290
)
288291
job = jobfile(
289292
file_path=script_name,

0 commit comments

Comments
 (0)