Skip to content

Commit c975139

Browse files
committed
Use env variables set by varipeps_slurm_manager
1 parent f980566 commit c975139

File tree

2 files changed

+44
-11
lines changed

2 files changed

+44
-11
lines changed

varipeps/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,12 @@ class Slurm_Restart_Mode(IntEnum):
4444
DISABLED = (
4545
auto()
4646
) #: Disable automatic restart of slurm job if maximal runtime limit is reached
47+
WRITE_NEED_RESTART_FILE = (
48+
auto()
49+
) #: Write file to indicate that restart is needed but no slurm scripts
4750
WRITE_RESTART_SCRIPT = (
4851
auto()
49-
) #: Write restart script but do not submit new slurm job
52+
) #: Write slurm restart script but do not submit new slurm job
5053
AUTOMATIC_RESTART = auto() #: Write restart script and start new slurm job with it
5154

5255

varipeps/optimization/optimizer.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import datetime
44
from functools import partial
55
import importlib
6+
import os
67
from os import PathLike
8+
import pathlib
79
import time
810

911
from scipy.optimize import OptimizeResult
@@ -226,9 +228,10 @@ def autosave_function_restartable(
226228
projector_method,
227229
signal_reset_descent_dir,
228230
) -> None:
229-
with h5py.File(
230-
f"{str(filename)}.restartable", "w", libver=("earliest", "v110")
231-
) as f:
231+
state_filename = os.environ.get("VARIPEPS_STATE_FILE")
232+
if state_filename is None:
233+
state_filename = f"{str(filename)}.restartable"
234+
with h5py.File(state_filename, "w", libver=("earliest", "v110")) as f:
232235
grp = f.create_group("unitcell")
233236
unitcell.save_to_group(grp, True)
234237

@@ -1098,19 +1101,46 @@ def random_noise(a):
10981101
runtime_std = np.std(flatten_runtime)
10991102

11001103
remaining_slurm_time = slurm_data["TimeLimit"] - slurm_data["RunTime"]
1104+
1105+
if (
1106+
remaining_time_correction := os.environ.get(
1107+
"VARIPEPS_REMAINING_TIME_CORRECTION"
1108+
)
1109+
) is not None:
1110+
try:
1111+
remaining_time_correction = int(remaining_time_correction)
1112+
remaining_slurm_time -= datetime.timedelta(
1113+
seconds=remaining_time_correction
1114+
)
1115+
except (TypeError, ValueError):
1116+
pass
1117+
11011118
time_of_one_step = datetime.timedelta(
11021119
seconds=runtime_mean + 3 * runtime_std
11031120
)
11041121

11051122
if remaining_slurm_time < time_of_one_step:
1106-
SlurmUtils.generate_restart_scripts(
1107-
f"{str(autosave_filename)}.restart.slurm",
1108-
f"{str(autosave_filename)}.restart.py",
1109-
f"{str(autosave_filename)}.restartable",
1110-
slurm_data,
1111-
)
1123+
if (
1124+
restart_needed_filename := os.environ.get(
1125+
"VARIPEPS_NEED_RESTART_FILE"
1126+
)
1127+
) is not None:
1128+
pathlib.Path(restart_needed_filename).touch()
1129+
1130+
if (
1131+
varipeps_config.slurm_restart_mode
1132+
is Slurm_Restart_Mode.WRITE_RESTART_SCRIPT
1133+
or varipeps_config.slurm_restart_mode
1134+
is Slurm_Restart_Mode.AUTOMATIC_RESTART
1135+
):
1136+
SlurmUtils.generate_restart_scripts(
1137+
f"{str(autosave_filename)}.restart.slurm",
1138+
f"{str(autosave_filename)}.restart.py",
1139+
f"{str(autosave_filename)}.restartable",
1140+
slurm_data,
1141+
)
11121142

1113-
slurm_restart_written = True
1143+
slurm_restart_written = True
11141144

11151145
if (
11161146
varipeps_config.slurm_restart_mode

0 commit comments

Comments
 (0)