Skip to content

Commit a573e17

Browse files
Merge pull request #156 from luk27official/main
V4 database
2 parents b20419a + 8ecf5c8 commit a573e17

16 files changed

Lines changed: 731 additions & 46 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@ data/
1212
dist/
1313
node_modules/
1414
venv/
15+
16+
# Administration stuff (symlinked files)
17+
executor_p2rank
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Create a new database including computation for all PDB entries.
4+
#
5+
import argparse
6+
import logging
7+
import os
8+
import typing
9+
import sys
10+
import requests
11+
import re
12+
import json
13+
import datetime
14+
import dataclasses
15+
16+
# Create a symlink to the directory to allow typed imports
17+
try:
18+
script_dir = os.path.dirname(os.path.abspath(__file__))
19+
sys.path.append(os.path.join(script_dir, ".."))
20+
sys.path.append(os.path.join(script_dir, "../executor-p2rank"))
21+
sys.path.append(os.path.join(script_dir, "../conservation/hmm_based"))
22+
sys.path.append(os.path.join(script_dir, "../conservation/alignment_based"))
23+
24+
os.symlink(os.path.join(script_dir, "..", "executor-p2rank"), os.path.join(script_dir, "..", "executor_p2rank"), target_is_directory=True)
25+
except FileExistsError:
26+
print("Symlinks already exist")
27+
28+
logger = logging.getLogger(__name__)
29+
logger.setLevel(logging.DEBUG)
30+
31+
# Define a schema for the prediction.
32+
# We use a schema that corresponds to DatabaseV3 from web_server.src.database_v3
33+
from executor_p2rank.run_p2rank_task import execute_directory_task
34+
35+
@dataclasses.dataclass
36+
class Prediction:
37+
# Directory with given prediction task.
38+
directory: str
39+
# User identifier of given task.
40+
identifier: str
41+
# Name of a database.
42+
database: str
43+
# Name of a conservation to compute.
44+
conservation: str
45+
# If true structure is not modified before predictions.
46+
structure_sealed: bool
47+
# Configuration file for p2rank.
48+
p2rank_configuration: str
49+
# Additional metadata to save to info file.
50+
metadata: typing.Dict
51+
# Identification of experimental structure.
52+
structure_code: typing.Optional[str] = None
53+
# File with user provided structure.
54+
structure_file: typing.Optional[str] = None
55+
# Identification of predicted structure.
56+
uniprot_code: typing.Optional[str] = None
57+
# Restriction to given chains.
58+
chains: typing.Optional[list[str]] = None
59+
60+
61+
def _init_logging():
62+
logging.basicConfig(
63+
filename="create_pdb_database.log",
64+
filemode='a',
65+
level=logging.DEBUG,
66+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
67+
68+
def _end_logging():
69+
logging.shutdown()
70+
71+
72+
def _read_arguments() -> typing.Dict[str, str]:
73+
parser = argparse.ArgumentParser()
74+
parser.add_argument(
75+
"--output_directory", required=True,
76+
help="Output directory for the database.")
77+
parser.add_argument(
78+
"--database_name", required=True,
79+
help="Name of the database (e.g. 'v3')")
80+
parser.add_argument(
81+
"--input_pdbs",
82+
help="Path to the JSON file with PDB IDs that will be merged with all current PDB entries available online.")
83+
parser.add_argument(
84+
"--compute_conservation", action="store_true",
85+
help="If set, conservation will be computed for all PDB entries.")
86+
87+
88+
return vars(parser.parse_args())
89+
90+
def _create_folders(args: typing.Dict[str, str]):
91+
folder_names = ["", "-alphafold", "-alphafold-conservation-hmm", "-conservation-hmm", "-user-upload"]
92+
for folder_name in folder_names:
93+
folder_path = os.path.join(args["output_directory"], args["database_name"] + folder_name)
94+
if not os.path.exists(folder_path):
95+
os.makedirs(folder_path)
96+
logger.info(f"Created folder {folder_path}")
97+
98+
logger.info("All folders created")
99+
100+
def _get_pdb_entries(args: typing.Dict[str, str]):
101+
# First, get a list of all PDB entries from the following endpoint
102+
url_entries = "https://data.rcsb.org/rest/v1/holdings/current/entry_ids"
103+
104+
response = requests.get(url_entries)
105+
if response.status_code != 200:
106+
logger.error(f"Failed to get PDB entries: {response.text}")
107+
sys.exit(1)
108+
109+
entries = response.json()
110+
logger.info(f"Number of PDB entries: {len(entries)}")
111+
112+
# If the input_pdbs is provided, we need to merge the entries with the input_pdbs
113+
if args["input_pdbs"]:
114+
print(args["input_pdbs"])
115+
with open(args["input_pdbs"], "r") as f:
116+
input_pdbs = json.load(f)
117+
118+
entries = set(entries)
119+
entries.update(input_pdbs)
120+
logger.info(f"Merged PDB entries with the input_pdbs: {len(entries)}")
121+
logger.info(f"Final number of PDB entries: {len(entries)}")
122+
123+
entries_list = list(entries)
124+
125+
logger.info("Returning list of PDB entries")
126+
127+
return entries_list
128+
129+
def _get_directory(identifier: str, args: typing.Dict[str, str]) -> typing.Optional[str]:
130+
"""Return directory for task with given identifier."""
131+
if not re.match(r"[_,\w]+", identifier):
132+
return None
133+
directory = identifier[1:3]
134+
db = _get_database_name(args)
135+
return os.path.join(args["output_directory"], db, directory, identifier)
136+
137+
138+
def _get_database_name(args: typing.Dict[str, str]) -> str:
139+
return args["database_name"] + "-conservation-hmm" if args["compute_conservation"] else args["database_name"]
140+
141+
def _parser_identifier(identifier: str):
142+
"""2SRC_A,B into 2SRC, [A,B]"""
143+
if "_" not in identifier:
144+
return identifier, []
145+
code, chains = identifier.split("_")
146+
return code.upper(), [chain.upper() for chain in chains.split(",")]
147+
148+
def _prepare_prediction_directory(prediction: Prediction):
149+
"""Initialize content of a directory for given task."""
150+
info = _create_info_file(prediction)
151+
_save_json(_info_file(prediction), info)
152+
input_directory = os.path.join(prediction.directory, "input")
153+
os.makedirs(input_directory, exist_ok=True)
154+
_save_json(
155+
os.path.join(input_directory, "configuration.json"),
156+
{
157+
"p2rank_configuration": prediction.p2rank_configuration,
158+
"structure_file": prediction.structure_file,
159+
"structure_code": prediction.structure_code,
160+
"structure_sealed": prediction.structure_sealed,
161+
"structure_uniprot": prediction.uniprot_code,
162+
"conservation": prediction.conservation,
163+
"chains": prediction.chains,
164+
})
165+
return info
166+
167+
def _create_info_file(prediction: Prediction):
168+
now = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%S")
169+
return {
170+
"id": prediction.identifier,
171+
"database": prediction.database,
172+
"created": now,
173+
"lastChange": now,
174+
"status": "queued",
175+
"metadata": prediction.metadata,
176+
}
177+
178+
def _info_file(prediction: Prediction) -> str:
179+
return os.path.join(prediction.directory, "info.json")
180+
181+
def _save_json(path: str, content):
182+
with open(path, "w", encoding="utf-8") as stream:
183+
json.dump(content, stream, ensure_ascii=True)
184+
185+
186+
def _run_predictions(args: typing.Dict[str, str], entries_list: typing.List[str]):
187+
successful_entries = []
188+
for entry in entries_list:
189+
logger.info(f"Running prediction for entry {entry}")
190+
191+
directory = _get_directory(entry, args)
192+
if directory is None:
193+
logger.error(f"Invalid entry directory: {entry}")
194+
continue
195+
196+
logger.info(f"Preparing prediction for entry {entry}")
197+
198+
pdb_code, chains = _parser_identifier(entry)
199+
200+
prediction = Prediction(
201+
directory=directory,
202+
identifier=entry,
203+
database=_get_database_name(args),
204+
structure_sealed=len(chains) == 0,
205+
conservation="hmm" if args["compute_conservation"] else "none",
206+
p2rank_configuration="conservation_hmm" if args["compute_conservation"] else "default",
207+
structure_code=pdb_code,
208+
chains=chains,
209+
metadata={},
210+
)
211+
212+
try:
213+
os.makedirs(prediction.directory, exist_ok=True)
214+
except OSError:
215+
logger.error(f"Failed to create directory {prediction.directory}")
216+
continue
217+
218+
_prepare_prediction_directory(prediction)
219+
220+
logger.info(f"Running prediction for entry {entry}")
221+
222+
execute_directory_task(prediction.directory, keep_working=False, stdout=False)
223+
224+
successful_entries.append(entry)
225+
226+
logger.info(f"Number of successful entries: {len(successful_entries)}")
227+
logger.info(f"Number of unsuccessful entries: {len(entries_list) - len(successful_entries)}")
228+
229+
logger.info("Successful entries:")
230+
logger.info(successful_entries)
231+
232+
logger.info("Unsuccessful entries:")
233+
logger.info([entry for entry in entries_list if entry not in successful_entries])
234+
235+
logger.info("All predictions done")
236+
237+
238+
if __name__ == "__main__":
239+
240+
args = _read_arguments()
241+
_init_logging()
242+
logger.info("Creating a new database ...")
243+
244+
_create_folders(args)
245+
pdb_entries = _get_pdb_entries(args)
246+
247+
_run_predictions(args, pdb_entries)
248+
249+
_end_logging()

documentation/predication-info.schema.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@
2020
"v3-alphafold",
2121
"v3-alphafold-conservation-hmm",
2222
"v3-conservation-hmm",
23-
"v3-user-upload"
23+
"v3-user-upload",
24+
"v4",
25+
"v4-alphafold",
26+
"v4-alphafold-conservation-hmm",
27+
"v4-conservation-hmm",
28+
"v4-user-upload"
2429
]
2530
},
2631
"created": {

executor-p2rank/Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,15 @@ RUN chmod a+x ./p2rank.sh \
192192
&& chmod a+x ./run_p2rank.py \
193193
&& chmod a+x ./run_p2rank_task.py
194194

195+
#
196+
# administration tools
197+
WORKDIR /opt/administration
198+
COPY --chown=user:user ./administration/requirements.txt ./
199+
RUN pip3 install -r requirements.txt
200+
COPY --chown=user:user ./administration ./
201+
202+
RUN chmod a+x ./create_pdb_database.py
203+
195204
#
196205
# environment
197206
WORKDIR /opt/executor-p2rank

executor-p2rank/p2rank.sh

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@ export JAVA_OPTS="$JAVA_OPTS -Xmx4196m"
55

66
# Set paths.
77
export INSTALL_DIR="/opt/p2rank/"
8-
CLASSPATH="${INSTALL_DIR}/bin/p2rank.jar:${INSTALL_DIR}/bin/lib/*"
8+
9+
if [ -n "$CLASSPATH" ]; then
10+
CLASSPATH="${INSTALL_DIR}/bin/p2rank.jar:${INSTALL_DIR}/bin/lib/*:$CLASSPATH"
11+
else
12+
CLASSPATH="${INSTALL_DIR}/bin/p2rank.jar:${INSTALL_DIR}/bin/lib/*"
13+
fi
914

1015
# Determine the Java command to use to start the JVM.
1116
if [ -n "$JAVA_HOME" ] ; then
@@ -14,12 +19,5 @@ else
1419
JAVACMD="java"
1520
fi
1621

17-
# Disable Groovy warnings in Java 11.
18-
JAVA_VERSION=`"$JAVACMD" -version 2>&1 | head -1 | cut -d '"' -f 2`
19-
JAVA_VERSION_MAJOR=`echo $JAVA_VERSION | cut -d '.' -f 1`
20-
if [[ "$JAVA_VERSION_MAJOR" -ge 11 ]]; then
21-
export JAVA_OPTS="$JAVA_OPTS --add-opens=java.base/sun.nio.fs=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.annotation=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.module=ALL-UNNAMED --add-opens=java.base/java.lang.ref=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.math=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.net.spi=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.nio.channels=ALL-UNNAMED --add-opens=java.base/java.nio.channels.spi=ALL-UNNAMED --add-opens=java.base/java.nio.charset=ALL-UNNAMED --add-opens=java.base/java.nio.charset.spi=ALL-UNNAMED --add-opens=java.base/java.nio.file=ALL-UNNAMED --add-opens=java.base/java.nio.file.attribute=ALL-UNNAMED --add-opens=java.base/java.nio.file.spi=ALL-UNNAMED --add-opens=java.base/java.security=ALL-UNNAMED --add-opens=java.base/java.security.acl=ALL-UNNAMED --add-opens=java.base/java.security.cert=ALL-UNNAMED --add-opens=java.base/java.security.interfaces=ALL-UNNAMED --add-opens=java.base/java.security.spec=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.text.spi=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.time.chrono=ALL-UNNAMED --add-opens=java.base/java.time.format=ALL-UNNAMED --add-opens=java.base/java.time.temporal=ALL-UNNAMED --add-opens=java.base/java.time.zone=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED --add-opens=java.base/java.util.function=ALL-UNNAMED --add-opens=java.base/java.util.jar=ALL-UNNAMED --add-opens=java.base/java.util.regex=ALL-UNNAMED --add-opens=java.base/java.util.spi=ALL-UNNAMED --add-opens=java.base/java.util.stream=ALL-UNNAMED --add-opens=java.base/java.util.zip=ALL-UNNAMED --add-opens=java.datatransfer/java.awt.datatransfer=ALL-UNNAMED --add-opens=java.desktop/java.applet=ALL-UNNAMED --add-opens=java.desktop/java.awt=ALL-UNNAMED --add-opens=java.desktop/java.awt.color=ALL-UNNAMED --add-opens=java.desktop/java.awt.desktop=ALL-UNNAMED --add-opens=java.desktop/java.awt.dnd=ALL-UNNAMED --add-opens=java.desktop/java.awt.dnd.peer=ALL-UNNAMED --add-opens=java.desktop/java.awt.event=ALL-UNNAMED --add-opens=java.desktop/java.awt.font=ALL-UNNAMED --add-opens=java.desktop/java.awt.geom=ALL-UNNAMED --add-opens=java.desktop/java.awt.im=ALL-UNNAMED --add-opens=java.desktop/java.awt.im.spi=ALL-UNNAMED --add-opens=java.desktop/java.awt.image=ALL-UNNAMED --add-opens=java.desktop/java.awt.image.renderable=ALL-UNNAMED --add-opens=java.desktop/java.awt.peer=ALL-UNNAMED --add-opens=java.desktop/java.awt.print=ALL-UNNAMED --add-opens=java.desktop/java.beans=ALL-UNNAMED --add-opens=java.desktop/java.beans.beancontext=ALL-UNNAMED --add-opens=java.instrument/java.lang.instrument=ALL-UNNAMED --add-opens=java.logging/java.util.logging=ALL-UNNAMED --add-opens=java.management/java.lang.management=ALL-UNNAMED --add-opens=java.prefs/java.util.prefs=ALL-UNNAMED --add-opens=java.rmi/java.rmi=ALL-UNNAMED --add-opens=java.rmi/java.rmi.activation=ALL-UNNAMED --add-opens=java.rmi/java.rmi.dgc=ALL-UNNAMED --add-opens=java.rmi/java.rmi.registry=ALL-UNNAMED --add-opens=java.rmi/java.rmi.server=ALL-UNNAMED --add-opens=java.sql/java.sql=ALL-UNNAMED --add-opens=java.desktop/javax.swing=ALL-UNNAMED --add-opens=java.desktop/javax.swing.border=ALL-UNNAMED --add-opens=java.desktop/javax.swing.text=ALL-UNNAMED --add-opens=java.desktop/javax.swing.text.html=ALL-UNNAMED --add-opens=java.desktop/sun.awt=ALL-UNNAMED --add-opens=java.desktop/sun.java2d=ALL-UNNAMED --add-opens=java.desktop/sun.font=ALL-UNNAMED"
22-
fi
23-
2422
# We can ignore all stdout as it is also in the stderr as info level logs.
2523
"$JAVACMD" $JAVA_OPTS -cp "${CLASSPATH}" cz.siret.prank.program.Main -stdout_timestamp "yyyy.MM.dd HH:mm" 1>/dev/null "$@"

frontend/client/analyze/analyze.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
</div>
4242
<div id="progress-stdout" class="row justify-content-center">
4343
<code class="col-12 col-lg-10 col-xl-8">
44-
<pre id="progress-stdout-text"></pre>
45-
</code>
44+
<pre id="progress-stdout-text"></pre>
45+
</code>
4646
</div>
4747
@require("../partials/footer.html")
4848
</div>

frontend/client/analyze/analyze.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ async function checkTaskStatus() {
4343
}
4444
switch (response.content.status) {
4545
case TaskStatus.queued:
46-
renderQueued()
46+
renderQueued();
4747
queuedTimeout = Math.min(queuedTimeout + 1000, 10000);
4848
setTimeout(checkTaskStatus, queuedTimeout);
4949
return;
@@ -56,7 +56,7 @@ async function checkTaskStatus() {
5656
default:
5757
renderRunningTask(params.database, params.id);
5858
setTimeout(checkTaskStatus, 3000);
59-
return
59+
return;
6060
}
6161
}
6262

@@ -65,7 +65,7 @@ function getUrlQueryParams() {
6565
return {
6666
"database": params.get("database"),
6767
"id": params.get("code"),
68-
}
68+
};
6969
}
7070

7171
function renderInvalidTask() {
@@ -144,7 +144,8 @@ function renderFailedTask(database: string, id: string) {
144144
UserInterface.progressQuestions,
145145
UserInterface.progressStdout
146146
]);
147-
setProgressMessage("Task failed, see the log below for more details.");
147+
setProgressMessage("Task failed, see the log below for more details.<br/>");
148+
if (!database.includes("user")) setProgressMessage("Task failed, see the log below for more details.<br/><strong>To re-run the prediction, please refresh the page.</strong>");
148149
setStdout(database, id);
149150
}
150151

0 commit comments

Comments
 (0)