Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Note: You can use any Debian/Ubuntu based image you want.
FROM mcr.microsoft.com/devcontainers/base:bullseye

WORKDIR /tmp

RUN wget https://www.tbi.univie.ac.at/RNA/download/sourcecode/2_6_x/ViennaRNA-2.6.4.tar.gz
RUN tar -zxvf ViennaRNA-2.6.4.tar.gz
RUN cd ViennaRNA-2.6.4 && ./configure && make && sudo make install

# [Optional] Uncomment this section to install additional OS packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends ncbi-blast+
31 changes: 31 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-outside-of-docker-compose
{
"name": "Docker from Docker Compose",
"dockerComposeFile": "docker-compose.yml",
"service": "app",
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",

// Use this environment variable if you need to bind mount your local source code into a new container.
"remoteEnv": {
"LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}"
},

"features": {
"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
"version": "latest",
"enableNonRootDocker": "true",
"moby": "true"
},
"ghcr.io/devcontainers/features/python:1": {},
"ghcr.io/akhildevelops/devcontainer-features/pip:0": {}
}
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "docker --version",

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
26 changes: 26 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: '3'

services:
app:
build:
context: .
dockerfile: Dockerfile

volumes:
# Forwards the local Docker socket to the container.
- /var/run/docker.sock:/var/run/docker-host.sock
# Update this to wherever you want VS Code to mount the folder of your project
- ../..:/workspaces:cached

# Overrides default command so things don't shut down after the process ends.
entrypoint: /usr/local/share/docker-init.sh
command: sleep infinity

# Uncomment the next four lines if you will use a ptrace-based debuggers like C++, Go, and Rust.
# cap_add:
# - SYS_PTRACE
# security_opt:
# - seccomp:unconfined

# Use "forwardPorts" in **devcontainer.json** to forward an app port locally.
# (Adding the "ports" property to this file will not forward from a Codespace.)
12 changes: 12 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for more information:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
# https://containers.dev/guide/dependabot

version: 2
updates:
- package-ecosystem: "devcontainers"
directory: "/"
schedule:
interval: weekly
167 changes: 167 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

tmp
Genomes/GCF_000005845.2/**
Genomes/GCF_000006765.1/**
DataFiles
12 changes: 6 additions & 6 deletions AddGenome.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def determine_closest_relatives(GENOME_DIR):
seq_16S = get_16S_sequence(GENOME_DIR)
with open(TEMP_FILENAME1, 'w') as out_file:
out_file.write('>16S' + '\n' + seq_16S + '\n')
p = subprocess.run(['./blastn', '-db', DB_16S, '-query', TEMP_FILENAME1, '-evalue', '0.01', '-max_target_seqs', str(BLAST_RESULTS+1), '-num_threads', str(NUM_THREADS), '-outfmt', '6 qseqid sseqid evalue bitscore', '-out', TEMP_FILENAME2], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['blastn', '-db', DB_16S, '-query', TEMP_FILENAME1, '-evalue', '0.01', '-max_target_seqs', str(BLAST_RESULTS+1), '-num_threads', str(NUM_THREADS), '-outfmt', '6 qseqid sseqid evalue bitscore', '-out', TEMP_FILENAME2], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Read in BLAST results
replicates = {}
Expand Down Expand Up @@ -310,14 +310,14 @@ def create_blast_filter_for_relatives(GENOME_DIR, target_ID_to_genome):
for t in targets: out_file.write(t + '\n')

# Format Blast filter file
p = subprocess.run(['./blastdb_aliastool', '-seqid_file_in', FILTER_FILE], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['blastdb_aliastool', '-seqid_file_in', FILTER_FILE], stdout=subprocess.PIPE, stderr=subprocess.PIPE)


# Helper function for computing target homologs
# PERFORM BLAST WITHOUT USING MULTIPLE PROCESSORS
def blast_targets_against_database_SINGLE_PROCESS(protein_file):
BLAST_OUTPUT_FILE = str(time.time()) + '.blast'
p = subprocess.run(['./blastp', '-db', DB_FAA, '-query', protein_file, '-outfmt', '6 qseqid sseqid evalue bitscore', '-out', BLAST_OUTPUT_FILE, '-evalue', '0.01', '-max_target_seqs', '100', '-num_threads', str(NUM_THREADS), '-seqidlist', FILTER_FILE + '.bsl'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['blastp', '-db', DB_FAA, '-query', protein_file, '-outfmt', '6 qseqid sseqid evalue bitscore', '-out', BLAST_OUTPUT_FILE, '-evalue', '0.01', '-max_target_seqs', '100', '-num_threads', str(NUM_THREADS), '-seqidlist', FILTER_FILE + '.bsl'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if (os.path.exists(protein_file)): os.remove(protein_file)
if (os.path.exists(FILTER_FILE)): os.remove(FILTER_FILE)
if (os.path.exists(FILTER_FILE + '.bsl')): os.remove(FILTER_FILE + '.bsl')
Expand All @@ -331,7 +331,7 @@ def run_blast(target_subset):
for target in target_subset:
GENOME_DIR, accession, protein_seq = target
out_file.write('>' + accession + '\n' + protein_seq + '\n')
p = subprocess.run(['./blastp', '-db', DB_FAA, '-query', accession_file, '-outfmt', '6 qseqid sseqid evalue bitscore', '-out', GENOME_DIR + accession + '.blast.xyz', '-evalue', '0.01', '-max_target_seqs', '100', '-num_threads', str(NUM_THREADS), '-seqidlist', FILTER_FILE + '.bsl'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['blastp', '-db', DB_FAA, '-query', accession_file, '-outfmt', '6 qseqid sseqid evalue bitscore', '-out', GENOME_DIR + accession + '.blast.xyz', '-evalue', '0.01', '-max_target_seqs', '100', '-num_threads', str(NUM_THREADS), '-seqidlist', FILTER_FILE + '.bsl'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if (os.path.exists(accession_file)): os.remove(accession_file)


Expand Down Expand Up @@ -482,7 +482,7 @@ def run_RNAplfold(target):
accession, mRNA_name, mRNA_sequence, count = target
with open(GENOME_DIR + RNAPLFOLD_DIR + accession + '____' + mRNA_name + '.fa', 'w') as out_file:
out_file.write('>' + accession + '____' + mRNA_name + '\n' + mRNA_sequence + '\n')
p = subprocess.run(['./RNAplfold', '-u', '40', '-O', '--plex_output', '--auto-id', '--id-prefix', 'TR' + count], input=mRNA_sequence.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['RNAplfold', '-u', '40', '-O', '--plex_output', '--auto-id', '--id-prefix', 'TR' + count], input=mRNA_sequence.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if (p.returncode != 0) or (len(p.stderr.decode()) > 0):
error('Problem executing RNAplfold:\t' + str(p.stderr.decode()) + '\n')
renameAndRemoveOutputFiles('TR' + count + '_0001', accession + '____' + mRNA_name)
Expand All @@ -501,7 +501,7 @@ def RNAplfold_MULTI_PROCESS(targets):
# Helper function for computing target accessibility
# CREATE BINARY VERSIONS OF ACCESSIBILITY FILES USING RNAplex
def create_binary_files():
p = subprocess.run(['./RNAplex', '-a', GENOME_DIR + RNAPLFOLD_DIR, '-k'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['RNAplex', '-a', GENOME_DIR + RNAPLFOLD_DIR, '-k'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
filelist = os.listdir(GENOME_DIR + RNAPLFOLD_DIR)
for f in filelist:
if (f.endswith('_openen')): os.remove(GENOME_DIR + RNAPLFOLD_DIR + f)
Expand Down
10 changes: 5 additions & 5 deletions TargetRNA3.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,10 @@ def get_sRNA_homologs(SRNA_FILENAME, genome):
RESTRICT_FILENAME = FILENAME + '.restrict'
with open(RESTRICT_FILENAME, 'w') as out_file:
for accession in genome: out_file.write(accession + '\n')
p = subprocess.run(['./blastdb_aliastool', '-seqid_file_in', RESTRICT_FILENAME], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['blastdb_aliastool', '-seqid_file_in', RESTRICT_FILENAME], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# BLAST sRNA sequence
p = subprocess.run(['./blastn', '-db', DB, '-query', SRNA_FILENAME, '-outfmt', '6 qseqid sseqid evalue bitscore qstart qend', '-out', SRNA_HOMOLOGS_FILENAME, '-evalue', '0.01', '-max_target_seqs', '100', '-num_threads', str(NUM_THREADS), '-negative_seqidlist', RESTRICT_FILENAME + '.bsl'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['blastn', '-db', DB, '-query', SRNA_FILENAME, '-outfmt', '6 qseqid sseqid evalue bitscore qstart qend', '-out', SRNA_HOMOLOGS_FILENAME, '-evalue', '0.01', '-max_target_seqs', '100', '-num_threads', str(NUM_THREADS), '-negative_seqidlist', RESTRICT_FILENAME + '.bsl'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Determine homologs of sRNA
sRNA_homologs = {}
Expand Down Expand Up @@ -299,10 +299,10 @@ def get_homologs(GENOME_DIR, SRNA_FILENAME, genome, genes):
def determine_sRNA_accessibility(GENOME_DIR, sRNA_name, sRNA_sequence):
TIME_STR = str(time.time())
WINDOW_SIZE = min(70, len(sRNA_sequence))
p = subprocess.run(['nice', './RNAplfold', '-u', '40', '-O', '--plex_output', '-W', str(WINDOW_SIZE), '--auto-id', '--id-prefix', TIME_STR], input=sRNA_sequence.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['nice', 'RNAplfold', '-u', '40', '-O', '--plex_output', '-W', str(WINDOW_SIZE), '--auto-id', '--id-prefix', TIME_STR], input=sRNA_sequence.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if (p.returncode != 0) or (len(p.stderr.decode()) > 0):
sys.stderr.write('ERROR executing RNAplfold:\t' + str(p.stderr.decode()) + '\n')
p = subprocess.run(['./RNAplex', '-a', '.', '-k'], input=sRNA_sequence.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['RNAplex', '-a', '.', '-k'], input=sRNA_sequence.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Clean up
shutil.move(TIME_STR + '_0001_openen_bin', GENOME_DIR + RNAPLFOLD_DIR + sRNA_name + '_openen_bin')
Expand All @@ -313,7 +313,7 @@ def determine_sRNA_accessibility(GENOME_DIR, sRNA_name, sRNA_sequence):

# HELPER FUNCTION FOR COMPUTING INTERACTION ENERGIES. COMPUTES ENERGIES USING RNAPLEX.
def run_RNAplex(GENOME_DIR, SRNA_FILENAME, f):
p = subprocess.run(['nice', './RNAplex', '-f', '0', '-q', SRNA_FILENAME, '-t', GENOME_DIR + RNAPLFOLD_DIR + f, '-a', GENOME_DIR + RNAPLFOLD_DIR, '-b'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.run(['nice', 'RNAplex', '-f', '0', '-q', SRNA_FILENAME, '-t', GENOME_DIR + RNAPLFOLD_DIR + f, '-a', GENOME_DIR + RNAPLFOLD_DIR, '-b'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
structure_info = p.stdout.decode().strip().split('\n')[2]
return (f, structure_info)

Expand Down
Loading