Skip to content

Commit 3aabaf1

Browse files
Merge pull request #3 from PyEED/blastToolsLocally
Blast tools locally
2 parents fe3d564 + 9a3cb05 commit 3aabaf1

File tree

11 files changed

+263
-1
lines changed

11 files changed

+263
-1
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,11 @@
22
[![Publish Docker image](https://github.com/EnzymeML/EnzymeML_JupyterLab/actions/workflows/release_image.yaml/badge.svg)](https://github.com/EnzymeML/EnzymeML_JupyterLab/actions/workflows/release_image.yaml)
33

44
🐳 Docker image with JupyterLab and `pyeed`
5+
6+
#### Ports Used by different containers
7+
8+
- `cytoscape` uses `6080` and `8787`
9+
- jupyterlab uses `8888`
10+
- clustalo uses `5001`
11+
- blast uses `6001`
12+
- mmseq2 uses `8000`

blast/Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM ncbi/blast
2+
3+
# Install Python and Flask
4+
RUN apt-get update && apt-get install -y python3 python3-pip
5+
RUN pip3 install fastapi uvicorn
6+
7+
# Add the Python script to the container
8+
COPY app.py /usr/local/bin/app.py
9+
10+
# Set the working directory
11+
WORKDIR /usr/local/bin
12+
13+
# Expose the port the server will run on
14+
EXPOSE 6001
15+
16+
# Run the Python server script
17+
CMD ["python3", "app.py"]

blast/app.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from fastapi import FastAPI, HTTPException, Request
2+
from pydantic import BaseModel
3+
import subprocess
4+
import os
5+
import uuid
6+
from typing import Optional
7+
import logging
8+
9+
app = FastAPI()
10+
logger = logging.getLogger(__name__)
11+
12+
class BlastRequest(BaseModel):
13+
tool: str
14+
query: str
15+
db: str
16+
evalue: str
17+
outfmt: str
18+
19+
20+
## ENDPOINTS --------------------------
21+
22+
def create_fastas_file_from_seq(seq, filename):
23+
with open(filename, 'w') as file:
24+
file.write(f">seq\n{seq}\n")
25+
26+
@app.get("/")
27+
async def read_root():
28+
return {"message": "Welcome to the BLAST API!"}
29+
30+
# this get json params
31+
@app.post("/run_blast")
32+
async def run_blast(request: Request):
33+
request = await request.json()
34+
35+
query_filename = f"in.fasta"
36+
result_filename = f"out.out"
37+
# create empty file
38+
open(result_filename, 'w').close()
39+
40+
# Create the FASTA file
41+
create_fastas_file_from_seq(request['query'], query_filename)
42+
43+
# Run the BLAST command
44+
command = [
45+
request['tool'],
46+
'-query', query_filename,
47+
'-db', request['db'],
48+
'-evalue', request['evalue'],
49+
'-outfmt', request['outfmt'],
50+
'-num_threads', request['num_threads'],
51+
'-out', result_filename,
52+
'-max_target_seqs', '10000'
53+
]
54+
55+
try:
56+
subprocess.run(command, check=True)
57+
except subprocess.CalledProcessError as e:
58+
raise HTTPException(status_code=500, detail=str(e))
59+
60+
61+
with open(result_filename, 'r') as file:
62+
result = file.read()
63+
64+
return result
65+
66+
67+
68+
if __name__ == '__main__':
69+
import uvicorn
70+
71+
uvicorn.run("app:app", host="0.0.0.0", port=6001, reload=True)

blast/reload_development.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
sudo docker stop blast_docker
2+
sudo docker remove blast_docker
3+
sudo docker build --no-cache -t blast_docker .
4+
sudo docker run --name blast_docker --volume /mnt/databases:/blast/blastdb -p 6001:6001 blast_docker

clustalo/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,7 @@ RUN wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 \
1111
COPY requirements.txt .
1212
RUN pip install -r requirements.txt
1313

14+
COPY app.py .
15+
16+
1417
CMD ["python", "app.py"]
0 Bytes
Binary file not shown.

clustalo/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
fastapi
22
python-multipart
3-
uvicorn
3+
uvicorn

mmseqs2/Dockerfile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Base image for mmseqs2
2+
FROM soedinglab/mmseqs2:latest
3+
4+
# Add the standard Debian repositories to ensure we can install all packages
5+
RUN echo "deb http://deb.debian.org/debian bullseye main contrib non-free" > /etc/apt/sources.list
6+
RUN echo "deb http://security.debian.org/debian-security bullseye-security main contrib non-free" >> /etc/apt/sources.list
7+
8+
# Install Python and Flask
9+
RUN apt-get update && apt-get install -y python3 python3-pip
10+
RUN pip3 install fastapi uvicorn
11+
12+
# Copy the FastAPI app to the container
13+
COPY app.py app.py
14+
15+
# Expose the port on which FastAPI will run
16+
EXPOSE 8000
17+
18+
# Start the FastAPI server when the container starts
19+
CMD ["python3", "app.py"]

mmseqs2/app.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
from fastapi import FastAPI, HTTPException
2+
from pydantic import BaseModel
3+
import subprocess
4+
import os
5+
from uuid import uuid4
6+
import shutil
7+
8+
app = FastAPI()
9+
10+
# Define a model for the input parameters
11+
class MMSeqsParams(BaseModel):
12+
query: str # The query sequence
13+
database: str
14+
output: str # The output directory
15+
sensitivity: float = 7.5 # Sensitivity parameter for mmseqs2
16+
threads: int = 4 # Number of threads to use
17+
blast_format: bool = True # Option to convert to BLAST+ format
18+
19+
# Dictionary to keep track of running jobs and results
20+
job_results = {}
21+
22+
def create_fastas_file_from_seq(seq, filename):
23+
with open(filename, 'w') as file:
24+
file.write(f">seq\n{seq}\n")
25+
26+
def create_queryDB_from_seq(filename):
27+
# this will create a db from a single sequence file
28+
# the command is mmseqs createdb <input> <output>
29+
# the output should be a file with the same name as the input but with the extension .db
30+
31+
command = [
32+
"mmseqs", "createdb",
33+
filename,
34+
filename.replace('fasta', '') + ".db"
35+
]
36+
37+
try:
38+
subprocess.run(command, check=True)
39+
40+
except subprocess.CalledProcessError as e:
41+
raise HTTPException(status_code=600, detail=str(e))
42+
43+
44+
@app.get("/")
45+
async def read_root():
46+
return {"message": "Welcome to the MMSeqs2 API!"}
47+
48+
@app.post("/run_mmseqs")
49+
async def run_mmseqs(params: MMSeqsParams):
50+
# Create a unique job id
51+
job_id = str(uuid4())
52+
output_dir = f"/tmp/{job_id}"
53+
54+
# Prepare the output directory
55+
os.makedirs(output_dir, exist_ok=True)
56+
57+
# Prepare paths
58+
result_m8_path = os.path.join(output_dir, "result.m8")
59+
result_tsv_path = os.path.join(output_dir, "result.tsv")
60+
61+
# Create the FASTA file
62+
path_query = os.path.join(output_dir, "query.fasta")
63+
path_queryDB = path_query.replace('fasta', '') + ".db"
64+
create_fastas_file_from_seq(params.query, path_query)
65+
create_queryDB_from_seq(path_query)
66+
67+
# Run the mmseqs2 search command
68+
command = [
69+
"mmseqs", "search",
70+
path_queryDB,
71+
params.database,
72+
os.path.join(output_dir, "result"),
73+
output_dir,
74+
"--threads", str(params.threads),
75+
"--sensitivity", str(params.sensitivity)
76+
]
77+
78+
try:
79+
# Execute mmseqs search
80+
subprocess.run(command, check=True)
81+
82+
# Convert the results to BLAST+ format if requested
83+
if params.blast_format:
84+
# mmseqs convertalis queryDB targetDB resultDB resultDB.m8
85+
# Convert to BLAST tabular format (BLAST m8 format)
86+
convert_command = [
87+
"mmseqs", "convertalis",
88+
params.query,
89+
params.database,
90+
os.path.join(output_dir, "result"),
91+
result_m8_path,
92+
]
93+
subprocess.run(convert_command, check=True)
94+
95+
# Store the result path for m8 format
96+
job_results[job_id] = {
97+
"status": "completed",
98+
"result_path": result_m8_path
99+
}
100+
else:
101+
# Store the result path for standard mmseqs2 output (TSV format)
102+
job_results[job_id] = {
103+
"status": "completed",
104+
"result_path": result_tsv_path
105+
}
106+
107+
return {"job_id": job_id}
108+
except subprocess.CalledProcessError as e:
109+
raise HTTPException(status_code=500, detail=f"mmseqs2 failed: {str(e)}")
110+
111+
@app.get("/results/{job_id}")
112+
async def get_results(job_id: str):
113+
# Check if the job exists
114+
if job_id not in job_results:
115+
raise HTTPException(status_code=404, detail="Job not found")
116+
117+
# Get the result path
118+
result = job_results[job_id]
119+
120+
# Read and return the result (assuming it's a text file you want to read and return)
121+
result_file = result["result_path"]
122+
if os.path.exists(result_file):
123+
with open(result_file, "r") as file:
124+
data = file.read()
125+
return {"status": result["status"], "results": data}
126+
else:
127+
raise HTTPException(status_code=404, detail="Result file not found")
128+
129+
130+
if __name__ == '__main__':
131+
import uvicorn
132+
133+
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

mmseqs2/reload_development.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
sudo docker stop mmseq_docker
2+
sudo docker remove mmseq_docker
3+
sudo docker build --no-cache -t mmseq_docker .
4+
sudo docker run --name mmseq_docker --volume /mnt/databases:/app -p 8000:8000 mmseq_docker

0 commit comments

Comments
 (0)