Skip to content

Commit f718a27

Browse files
authored
Merge pull request #111 from PyEED/graph-db
Graph db
2 parents af3e981 + d8603d5 commit f718a27

File tree

146 files changed

+44156
-7649
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+44156
-7649
lines changed

.github/workflows/lint.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: Lint
2+
3+
on: pull_request
4+
jobs:
5+
lint:
6+
runs-on: ubuntu-latest
7+
steps:
8+
- uses: actions/checkout@v4
9+
10+
- name: Set up Python
11+
uses: actions/setup-python@v4
12+
with:
13+
python-version: "3.x"
14+
15+
- name: Install dependencies
16+
run: |
17+
python -m pip install --upgrade pip
18+
python -m pip install poetry
19+
poetry install --with dev
20+
21+
- name: Run Ruff
22+
run: |
23+
poetry run ruff check .
24+
poetry run ruff format --check .
25+
26+
- name: Run mypy
27+
run: |
28+
poetry run mypy src/

.gitignore

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ __pycache__/
55

66
# C extensions
77
*.so
8+
.vscode/
9+
.huggingface/
810

911
# Distribution / packaging
1012
.Python
@@ -158,4 +160,9 @@ pyrightconfig.json
158160

159161
poetry.lock
160162

161-
.ruff_cache
163+
.ruff_cache
164+
165+
# Test python files
166+
test.py
167+
168+
docker-compose.yml

README.md

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
[![Documentation](https://github.com/PyEED/pyeed/actions/workflows/make_docs.yaml/badge.svg)](https://github.com/PyEED/pyeed/actions/workflows/make_docs.yaml)
88

99
## About 📖
10-
pyEED is a toolkit enabling object-oriented analysis of protein sequences, instead of working with sequences in a file-oriented fashion. This will enable the user to easily access and manipulate sequence information and to perform analyses on the sequence data.
10+
pyeed is a toolkit enabling object-oriented analysis of protein sequences, instead of working with sequences in a file-oriented fashion. This will enable the user to easily access and manipulate sequence information and to perform analyses on the sequence data.
1111
This library is currently under development and thus the API is subject to change.
1212

13+
![PyEED](./docs/figs/pyeed-model.png)
14+
1315

1416
## Installation ⚙️
1517

@@ -20,9 +22,4 @@ pip install git+https://github.com/PyEED/pyeed.git
2022

2123
## Quick start 🚀
2224

23-
Library is currently refactored, quick start will be updated soon!
24-
25-
## Documentation 📘
26-
27-
Check out the [documentation](https://pyeed.github.io/pyeed/) for in-depth information on how to setup `pyeed`,
28-
use the build-in tools, and store sequence data in databases.
25+
### Launch Neo4j database via Docker and mount to a local directory

docker/blast/Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM ncbi/blast
2+
3+
# Install Python and Flask
4+
RUN apt-get update && apt-get install -y python3 python3-pip
5+
RUN pip3 install fastapi uvicorn
6+
7+
# Add the Python script to the container
8+
COPY app.py /usr/local/bin/app.py
9+
10+
# Set the working directory
11+
WORKDIR /usr/local/bin
12+
13+
# Disable Python output buffering
14+
ENV PYTHONUNBUFFERED=1
15+
16+
# Run the Python server script
17+
CMD ["python3", "app.py"]

docker/blast/app.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import logging
2+
import os
3+
import subprocess
4+
import sys
5+
6+
from fastapi import FastAPI, HTTPException, Request
7+
from fastapi.responses import RedirectResponse
8+
9+
app = FastAPI()
10+
11+
# Configure logging to output to stdout without buffering
12+
logging.basicConfig(
13+
level=logging.DEBUG,
14+
format="%(levelname)s - %(message)s",
15+
stream=sys.stdout,
16+
)
17+
logger = logging.getLogger(__name__)
18+
19+
20+
def to_fasta(seq: str) -> str:
21+
return f">query_sequence\n{seq}"
22+
23+
24+
def _check_db_path_correct(db_path: str, db_name: str) -> None:
25+
# check if db_path exists
26+
if not os.path.exists(db_path):
27+
raise HTTPException(
28+
status_code=400, detail=f"Database path does not exist: {db_path}"
29+
)
30+
# check if db_path is a directory
31+
if not os.path.isdir(db_path):
32+
raise HTTPException(
33+
status_code=400, detail=f"Database path is not a directory: {db_path}"
34+
)
35+
36+
37+
@app.get("/")
38+
async def read_root() -> None:
39+
logger.debug("Entering root endpoint")
40+
return RedirectResponse(url="/docs") # type: ignore
41+
42+
43+
@app.get("/blastp_help")
44+
def blastp_help() -> str:
45+
logger.debug("Entering /blastp_help endpoint")
46+
47+
command = ["blastp", "-help"]
48+
logger.debug(f"Running command: {command}")
49+
50+
try:
51+
result = subprocess.run(command, capture_output=True, text=True)
52+
53+
# Return the help text
54+
return result.stdout
55+
except subprocess.CalledProcessError as e:
56+
# Log and raise an HTTP exception if the subprocess fails
57+
logger.error(f"blastp help command failed: {e.stderr}")
58+
raise HTTPException(status_code=400, detail=f"Command failed: {e.stderr}")
59+
60+
61+
@app.get("/blastn_help")
62+
def blastn_help() -> str:
63+
logger.debug("Entering /blastn_help endpoint")
64+
65+
command = ["blastn", "-help"]
66+
logger.debug(f"Running command: {command}")
67+
68+
try:
69+
result = subprocess.run(command, capture_output=True, text=True)
70+
return result.stdout
71+
except subprocess.CalledProcessError as e:
72+
logger.error(f"blastn help command failed: {e.stderr}")
73+
raise HTTPException(status_code=400, detail=f"Command failed: {e.stderr}")
74+
75+
76+
@app.post("/blast")
77+
async def run_blast(request: Request) -> dict[str, str]:
78+
"""Run BLAST search with provided parameters."""
79+
try:
80+
data = await request.json()
81+
logger.debug(f"Received request data: {data}")
82+
83+
_check_db_path_correct(data["db_path"], data["db_name"])
84+
85+
mode = data["mode"]
86+
sequence = data["sequence"]
87+
logger.debug(f"Sequence received: {sequence}")
88+
db_path = data["db_path"]
89+
db_name = data["db_name"]
90+
evalue = float(data["evalue"])
91+
max_target_seqs = int(data["max_target_seqs"])
92+
num_threads = int(data["num_threads"])
93+
94+
query_path = "/usr/local/bin/data/query.fasta"
95+
result_path = "/usr/local/bin/data/result.out"
96+
97+
# Create FASTA file
98+
with open(query_path, "w") as file:
99+
file.write(to_fasta(sequence))
100+
with open(query_path, "r") as file:
101+
logger.debug(f" file content: {file.read()}")
102+
103+
# debug db path exists
104+
logger.debug(f"db path exists: {os.path.exists(db_path)}")
105+
# debug list all files in db path
106+
logger.debug(f"files in db path: {os.listdir(db_path)}")
107+
# Run BLAST
108+
command = [
109+
mode,
110+
"-query",
111+
query_path,
112+
"-db",
113+
f"{db_path}/{db_name}",
114+
"-evalue",
115+
str(evalue),
116+
"-outfmt",
117+
"6",
118+
"-num_threads",
119+
str(num_threads),
120+
"-out",
121+
result_path,
122+
"-max_target_seqs",
123+
str(max_target_seqs),
124+
]
125+
126+
logger.debug(f"Running command: {command}")
127+
subprocess.run(command, capture_output=True, check=True, text=True)
128+
129+
# Read results
130+
with open(result_path, "r") as file:
131+
result_data = file.read()
132+
133+
# Cleanup
134+
os.remove(query_path)
135+
os.remove(result_path)
136+
137+
return {"result": result_data}
138+
139+
except Exception as e:
140+
logger.error(f"Error running BLAST: {str(e)}")
141+
raise HTTPException(status_code=500, detail=str(e))
142+
143+
144+
if __name__ == "__main__":
145+
import uvicorn
146+
147+
uvicorn.run("app:app", host="0.0.0.0", port=6001, reload=True)

docker/blast/reload_development.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
sudo docker stop blast
2+
sudo docker remove blast
3+
sudo docker build --no-cache -t blast_image .
4+
sudo docker run --name blast --volume /home/ala/BA/mydb:/blast/db --volume /mnt/databases_shared/:/blast/db/custom -p 6001:6001 blast_image
32 KB
Binary file not shown.
431 Bytes
Binary file not shown.
184 Bytes
Binary file not shown.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"version": "1.2",
3+
"dbname": "protein_db",
4+
"dbtype": "Protein",
5+
"db-version": 5,
6+
"description": "Protein Database",
7+
"number-of-letters": 893,
8+
"number-of-sequences": 10,
9+
"last-updated": "2025-01-20T15:57:00",
10+
"number-of-volumes": 1,
11+
"bytes-total": 51054,
12+
"bytes-to-cache": 1088,
13+
"files": [
14+
"protein_db.pdb",
15+
"protein_db.phr",
16+
"protein_db.pin",
17+
"protein_db.pog",
18+
"protein_db.pos",
19+
"protein_db.pot",
20+
"protein_db.psq",
21+
"protein_db.ptf",
22+
"protein_db.pto"
23+
]
24+
}

0 commit comments

Comments
 (0)