|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +from argparse import ArgumentParser |
| 4 | +from pathlib import Path |
| 5 | +import subprocess |
| 6 | +import sys |
| 7 | +import time |
| 8 | +from hyperopt import fmin, hp, STATUS_OK, tpe, Trials |
| 9 | + |
| 10 | + |
| 11 | +def function(params): |
| 12 | + schedule, chunk, ppn = params |
| 13 | + # chunk is given as a floating point number |
| 14 | + chunk = int(chunk) |
| 15 | + # ppn ranges from 0 to 35 (inclusive) |
| 16 | + ppn = 1 + int(ppn) |
| 17 | + omp_env = (f'schedule={schedule},chunk={int(chunk)},' + |
| 18 | + f'OMP_NUM_THREADS={ppn}') |
| 19 | + cmd = ['qsub', '-l', f'nodes=1:ppn={ppn}:haswell', |
| 20 | + '-v', omp_env, 'julia.pbs'] |
| 21 | + process = subprocess.run(cmd, stdout=subprocess.PIPE, |
| 22 | + encoding='utf8') |
| 23 | + job_id, *_ = process.stdout.split('.') |
| 24 | + print(f'### info: submitted job {job_id}', file=sys.stderr) |
| 25 | + output_file = Path(f'julia.pbs.o{job_id}') |
| 26 | + while not output_file.exists(): |
| 27 | + time.sleep(3) |
| 28 | + print(f'### info: job {job_id} finished', file=sys.stderr) |
| 29 | + runtimes = list() |
| 30 | + with open(f'julia.pbs.time{job_id}', 'r') as time_file: |
| 31 | + for line in time_file: |
| 32 | + runtimes.append(float(time_file.readline())) |
| 33 | + runtime = sum(runtimes)/len(runtimes) |
| 34 | + return { |
| 35 | + 'loss': runtime, 'schedule': schedule, 'chunk': chunk, |
| 36 | + 'ppn': ppn, 'job_id': job_id, 'status': STATUS_OK, |
| 37 | + 'time': time.strftime('%Y-%m-%d %H:%M:%S'), |
| 38 | + } |
| 39 | + |
| 40 | + |
| 41 | +def optimize(max_evals, max_ppn): |
| 42 | + space = hp.choice('schedule', [ |
| 43 | + ('static', hp.qloguniform('chunk_s', 2, 11, 10), |
| 44 | + hp.randint('ppn_s', max_ppn)), |
| 45 | + ('dynamic', hp.qloguniform('chunk_d', 2, 11, 10), |
| 46 | + hp.randint('ppn_d', max_ppn)), |
| 47 | + ('guided', hp.qloguniform('chunk_g', 2, 11, 10), |
| 48 | + hp.randint('ppn_g', max_ppn)), |
| 49 | + ]) |
| 50 | + trials = Trials() |
| 51 | + best = fmin(function, space=space, algo=tpe.suggest, |
| 52 | + max_evals=max_evals, trials=trials) |
| 53 | + return best, trials |
| 54 | + |
| 55 | + |
| 56 | +def main(): |
| 57 | + arg_parser = ArgumentParser(description='optimize external ' |
| 58 | + 'process') |
| 59 | + arg_parser.add_argument('--max-ppn', type=int, default=20, |
| 60 | + help='maximum number of cores to use') |
| 61 | + arg_parser.add_argument('--max-evals', type=int, |
| 62 | + default=100, help='maximum evals') |
| 63 | + arg_parser.add_argument('--trials', required=True, |
| 64 | + help='file to save trials') |
| 65 | + options = arg_parser.parse_args() |
| 66 | + _, trials = optimize(options.max_evals, options.max_ppn) |
| 67 | + with open(options.trials, 'w') as trials_file: |
| 68 | + print('schedule,chunk,ppn,job_id,runtime', |
| 69 | + file=trials_file) |
| 70 | + for trial in trials.results: |
| 71 | + schedule = trial['schedule'] |
| 72 | + chunk = trial['chunk'] |
| 73 | + ppn = trial['ppn'] |
| 74 | + job_id = trial['job_id'] |
| 75 | + runtime = trial['loss'] |
| 76 | + print(f'{schedule},{chunk:d},{ppn:d},' |
| 77 | + f'({job_id}),{runtime}', file=trials_file) |
| 78 | + |
| 79 | + |
| 80 | +if __name__ == '__main__': |
| 81 | + sys.exit(main()) |
0 commit comments