Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions input-files/tyk2_qvina/config.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
receptor=../input-files/tyk2_qvina/tyk2_protein_clean.pdbqt
center_x=-4.0
center_y=26.6
center_z=-30.4
size_x=22
size_y=22
size_z=22
exhaustiveness=8
15 changes: 14 additions & 1 deletion tools/afvs_prepare_folders.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,21 @@ def check_parameters(config):
if(empty_value(config, 'bash_template')):
print("* 'bash_template' must be set if batchsystem is 'bash'")
error = 1
elif(config['batchsystem'] == "lsf"):
if(empty_value(config, 'lsf_template')):
print("* 'lsf_template' must be set if batchsystem is 'lsf'")
error = 1
if(empty_value(config, 'lsf_cpus')):
print("* 'lsf_cpus' must be set if batchsystem is 'lsf'")
error = 1
if(empty_value(config, 'lsf_queue')):
print("* 'lsf_queue' must be set if batchsystem is 'lsf'")
error = 1
if(empty_value(config, 'lsf_job_submission_timeout')):
print("* 'lsf_job_submission_timeout' must be set if batchsystem is 'lsf'")
error = 1
else:
print(f"* batchsystem '{config['batchsystem']}' is not supported. Only awsbatch and slurm are supported")
print(f"* batchsystem '{config['batchsystem']}' is not supported. Only awsbatch, slurm, lsf and bash are supported")


if(empty_value(config, 'ligand_library_format')):
Expand Down
2 changes: 2 additions & 0 deletions tools/afvs_prepare_workunits.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ def process(ctx):
max_array_job_size = int(config['aws_batch_array_job_size'])
elif(config['batchsystem'] == "slurm"):
max_array_job_size = int(config['slurm_array_job_size'])
elif(config['batchsystem'] == "lsf"):
max_array_job_size = int(config['lsf_array_job_size'])
elif(config['batchsystem'] == "bash"):
max_array_job_size = int(config['bash_array_job_size'])

Expand Down
78 changes: 78 additions & 0 deletions tools/afvs_submit_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,82 @@ def submit_slurm(config, client, current_workunit, jobline):
# Slow ourselves down a bit
time.sleep(0.1)

def submit_lsf(config, client, current_workunit, jobline):

# Get the template
try:
with open(config['lsf_template']) as f:
lsf_template = jinja2.Template(f.read())
except IOError as error:
print(f"Cannot open the lsf_template ({config['lsf_template']})")
raise error

jobline_str = str(jobline)

# how many jobs are there that we need to submit?
subjobs_count = len(current_workunit['subjobs'])

# Where are we putting this file?
batch_workunit_base = Path(config['sharedfs_workunit_path']) / jobline_str
batch_workunit_base.mkdir(parents=True, exist_ok=True)

# LSF doesn't have a good way to submit an array job with a dependency on the previous one,
# so we will submit each subjob separately and then track the job IDs here.
job_ids = {}

for subjob_id in range(subjobs_count):

batch_submit_file = batch_workunit_base / f"submit_{subjob_id}.lsf"

template_values = {
"job_letter": config['job_letter'],
"job_name": config['job_name'],
"threads_to_use": config['threads_to_use'],
"subjob_id": subjob_id,
"lsf_cpus": config['lsf_cpus'],
"lsf_account": config['lsf_account'],
"lsf_queue": config['lsf_queue'],
"workunit_id": jobline_str,
"job_storage_mode": config['job_storage_mode'],
"job_tgz": current_workunit['download_path'],
"batch_workunit_base": batch_workunit_base.resolve().as_posix()
}
render_output = lsf_template.render(template_values)

try:
with open(batch_submit_file, "w") as f:
f.write(render_output)
except IOError as error:
print(f"Cannot write the workunit lsf file ({batch_submit_file})")
raise error

cmd = ["bsub"]

try:
with open(batch_submit_file, "r") as stdin_file:
ret = subprocess.run(cmd, stdin=stdin_file, capture_output=True,
text=True, timeout=int(config['lsf_job_submission_timeout']))
except subprocess.TimeoutExpired as err:
raise Exception("timeout on submission to bsub")

if ret.returncode == 0:
match = re.search(
r'Job <(?P<value>\d+)> is submitted', ret.stdout)
if match:
job_ids[str(subjob_id)] = int(match.groupdict()['value'])
else:
raise Exception("bsub returned, but cannot parse output")
else:
raise Exception(f"bsub did not return successfully: {ret.stderr}")

# Slow ourselves down a bit
time.sleep(0.1)

current_workunit['status'] = {
'af_job_status': 'SUBMITTED',
'job_name': f"afvs-{config['job_letter']}-{jobline_str}",
'job_ids': job_ids
}

def submit_aws_batch(config, client, current_workunit, jobline):

Expand Down Expand Up @@ -313,6 +389,8 @@ def process(config, start, stop):
submit_slurm(config, client, current_workunit, jobline)
elif(submit_type == "bash"):
run_bash(config, current_workunit, jobline)
elif(submit_type == "lsf"):
submit_lsf(config, client, current_workunit, jobline)
else:
print(f"Unknown submit type {submit_type}")

Expand Down
52 changes: 36 additions & 16 deletions tools/templates/all.ctrl
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
****** Job Resource Configuration

job_name=test
job_name=tyk2_lsf_test
# alphabetic characters (i.e. letters from a-z or A-Z)
# Used to describe distinct runs (using the same name will
# overwrite data if using S3!)

threads_per_docking=1
# How many threads should be used for each docking program.

threads_to_use=16
threads_to_use=8
# This sets how many processes the main execution loop should be using
# to process. This is generally 2x the number of vCPUs or hyperthreads
# available on the system it is being run on
Expand All @@ -21,8 +21,8 @@ program_timeout=90
** Batch system configuration
************************************************

batchsystem=awsbatch
# Possible values: awsbatch, slurm
batchsystem=lsf
# Possible values: awsbatch, slurm, lsf

****** AWS Batch Options (if batchsystem=awsbatch)

Expand Down Expand Up @@ -92,6 +92,26 @@ slurm_array_job_size=100
slurm_job_submission_timeout=10
# Timeout for submission of slurm jobs

****** LSF Options (if batchsystem=lsf)

lsf_template=./templates/template1.lsf.sh
# Template for the LSF job

lsf_account=
# LSF project/account to use (-P). If not set, default is used

lsf_queue=standard
# Queue to submit the job (-q)

lsf_cpus=8
# Number of CPUs per job (-n)

lsf_job_submission_timeout=10
# Timeout in seconds for each individual bsub call

lsf_array_job_size=100
# Maximum number of subjobs to group under a single workunit

****** Bash Options (if batchsystem=bash)

bash_template=./templates/template1.bash
Expand All @@ -105,7 +125,7 @@ bash_array_job_size=100
** Storage configuration
************************************************

data_storage_mode=s3
data_storage_mode=sharedfs
# This mode determines where data is retrieved as part of AFVS
# * s3: Job data is stored on S3 object store, which is the required
# mode if using AWS Batch. Items under the "S3 Object Store"
Expand All @@ -115,7 +135,7 @@ data_storage_mode=s3
# same shared filesystem that will allow for both input and output
# of data. This only allowed if Slurm is set at the scheduler

job_storage_mode=s3
job_storage_mode=sharedfs
# This mode determines where data is stored after a run
# * s3: Job data is stored on S3 object store, which is the required
# mode if using AWS Batch. Items under the "S3 Object Store"
Expand All @@ -126,7 +146,7 @@ job_storage_mode=s3
# of data. This is required if using Slurm or bash


data_collection_addressing_mode=hash
data_collection_addressing_mode=metatranche
# If input is placed with the hash addressing mode, then use 'hash'.
# otherwise use "metatranche" for the classic addressing mode

Expand Down Expand Up @@ -167,7 +187,7 @@ object_store_data_collection_prefix=Enamine_REAL_Space_2022q12

****** Shared Filesystem Settings

collection_folder=/home/ec2-user/collections
collection_folder=/scratch_space/YOUR_CLUSTER_USERNAME/collections
# Path to where the collection file (ready-to-dock ligands/docking requirement files)
# are stored
# * This is used when job_storage_mode=sharedfs or
Expand All @@ -184,7 +204,7 @@ collection_folder=/home/ec2-user/collections

****** Output information

summary_formats=parquet,csv.gz
summary_formats=csv.gz
# Format for summary files that are generated with the score data.
# Supported values:
# * csv.gz (comma delimited files)
Expand Down Expand Up @@ -230,7 +250,7 @@ collection_list_file=templates/todo.all
# Path to the file that contains the collection data on what should be
# processed as part of the workflow

dockings_per_subjob=1000
dockings_per_subjob=100
# Used as how many dockings should be processed per subjob. In general,
# a subjob should take about 20 min to an hour for efficiency.
# A reasonable number for this is generally 1000. The length of time
Expand All @@ -245,7 +265,7 @@ ligand_library_format=pdbqt
# For the selected docking program, the user is required to set
# this to a compatible value. Eg: for plants, set this to mol2

dynamic_tranche_filtering=1
dynamic_tranche_filtering=0
# Supported values:
# * 0: Disabled
# * 1: Enabled
Expand All @@ -262,7 +282,7 @@ tempdir_default=/dev/shm

****** Virtual Screening Options

docking_scenario_names=
docking_scenario_names=tyk2_qvina
# Names for the docking scenarios, separated by colons
# Each docking scenario has one value. Multiple docking scenarios/names have
# to be separated by colons ":" and without spaces
Expand All @@ -273,7 +293,7 @@ docking_scenario_names=
# In each of thes e folders must be the file config.txt which is used by the docking method to specify its options
# If other input files are required by the docking type, usually specified in the config.txt file, they have to be in the same folder.

docking_scenario_methods=
docking_scenario_methods=qvina02
# For each docking scenario name, a docking method has to be specified
# Possible values: qvina02, qvina_w, vina, smina_rigid, smina_flexible, gwovina, adfr, AutodockVina_1.2, AutodockZN
# gnina, rDock, M-Dock, MCDock, LigandFit, ledock, gold, iGemDock, idock, GalaxyDock3, autodock_gpu
Expand All @@ -286,7 +306,7 @@ docking_scenario_methods=
# smina_rigid has to be used for rigid docking with smina, while smine_flexible for flexible receptor docking
# scoring function can also be combined using the same strategy e.g. docking_scenario_methods=vina:nnscore2.0

docking_scenario_replicas=
docking_scenario_replicas=1
# Series of integers separated by colons ":"
# The number of values has to equal the number of docking methods
# specified in the variable "docking_scenario_methods"
Expand All @@ -296,7 +316,7 @@ docking_scenario_replicas=
# possible range: 1-99999 per field/docking method
# The docking scenario is comprised of all the docking types and their replicas

docking_scenario_batchsizes=
docking_scenario_batchsizes=1
# Purpose: How many ligands should be processed in a single invocation
# of the docking method (most methods do not allow more than 1)
# Series of integers separated by colons ":"
Expand All @@ -314,7 +334,7 @@ docking_scenario_basefolder=../input-files
# Base directory for where the docking scenarios are held. Nothing other
# than the required files for the docking scenario should be placed here

prescreen_mode=1
prescreen_mode=0
# Run a subset of the ligands included in the library. Only functional
# if library supports the prescreen functionality with .listing
# file inside the collection
Expand Down
68 changes: 68 additions & 0 deletions tools/templates/template1.lsf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env bash

# Copyright (C) 2019 Christoph Gorgulla
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# This file is part of AdaptiveFlow.
#
# AdaptiveFlow is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# AdaptiveFlow is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with AdaptiveFlow. If not, see <https://www.gnu.org/licenses/>.

# ---------------------------------------------------------------------------
#
# Description: LSF job file.
#
# ---------------------------------------------------------------------------

# Update the BSUB section if needed for your particular LSF
# installation. If a line starts with "##" (two #s) it will be
# ignored


#BSUB -J {{job_letter}}-{{workunit_id}}-{{subjob_id}}
#BSUB -n {{lsf_cpus}}
##BSUB -W 12:00
##BSUB -M 800
#BSUB -o {{batch_workunit_base}}/{{subjob_id}}.out
#BSUB -e {{batch_workunit_base}}/{{subjob_id}}.err
#BSUB -q {{lsf_queue}}
{% if lsf_account %}#BSUB -P {{lsf_account}}{% endif %}


# If you are using a virtualenv, make sure the correct one
# is being activated

source $(conda info --base)/etc/profile.d/conda.sh
conda activate afvs_env

# Ensure we run from the AFVS tools directory regardless of LSF working dir default
cd "{{batch_workunit_base}}/../../../tools"


# Job Information -- generally nothing in this
# section should be changed
##################################################################################

export AFVS_WORKUNIT={{workunit_id}}
export AFVS_JOB_STORAGE_MODE={{job_storage_mode}}
export AFVS_WORKUNIT_SUBJOB={{subjob_id}}
export AFVS_TMP_PATH=/dev/shm
export AFVS_CONFIG_JOB_TGZ={{job_tgz}}
export AFVS_TOOLS_PATH=${PWD}/bin
export AFVS_VCPUS={{threads_to_use}}

##################################################################################

date +%s > {{batch_workunit_base}}/{{subjob_id}}.start
./templates/afvs_run.py
date +%s > {{batch_workunit_base}}/{{subjob_id}}.end
Loading