1- from fastapi import FastAPI , HTTPException
2- from pydantic import BaseModel
1+ from fastapi import FastAPI , HTTPException , Request
2+ from starlette .responses import FileResponse
3+ import logging
4+
35import subprocess
46import os
5- from uuid import uuid4
67import shutil
78
89app = FastAPI ()
9-
10- # Define a model for the input parameters
11- class MMSeqsParams (BaseModel ):
12- query : str # The query sequence
13- database : str
14- output : str # The output directory
15- sensitivity : float = 7.5 # Sensitivity parameter for mmseqs2
16- threads : int = 4 # Number of threads to use
17- blast_format : bool = True # Option to convert to BLAST+ format
18-
19- # Dictionary to keep track of running jobs and results
20- job_results = {}
21-
22- def create_fastas_file_from_seq (seq , filename ):
23- with open (filename , 'w' ) as file :
24- file .write (f">seq\n { seq } \n " )
25-
26- def create_queryDB_from_seq (filename ):
27- # this will create a db from a single sequence file
28- # the command is mmseqs createdb <input> <output>
29- # the output should be a file with the same name as the input but with the extension .db
30-
31- command = [
32- "mmseqs" , "createdb" ,
33- filename ,
34- filename .replace ('fasta' , '' ) + ".db"
35- ]
36-
37- try :
38- subprocess .run (command , check = True )
39-
40- except subprocess .CalledProcessError as e :
41- raise HTTPException (status_code = 600 , detail = str (e ))
10+ logging .basicConfig (level = logging .INFO )
11+ logger = logging .getLogger (__name__ )
12+ logger .info ("FastAPI server is running..." )
13+
14+
15+ def create_fastas_file_from_seq (query_string , filename ):
16+ """
17+ Creates a FASTA file from a single string containing FASTA-formatted sequences.
18+
19+ Args:
20+ query_string (str): String containing FASTA-formatted sequences.
21+ filename (str): Path to the output FASTA file.
22+
23+ Raises:
24+ ValueError: If any sequence contains invalid characters.
25+ """
26+ def validate_sequence (sequence : str ) -> bool :
27+ """Validate that a sequence contains only valid amino acid characters."""
28+ valid_chars = set ("ACDEFGHIKLMNPQRSTVWY*X" ) # Allow amino acids + stop codon (*), unknown (X)
29+ sequence = sequence .upper ().strip ().replace ("\n " , "" ) # Remove whitespace and newlines
30+ return all (char in valid_chars for char in sequence )
31+
32+ # Split query string into lines
33+ lines = query_string .strip ().split ("\n " )
34+
35+ # Parse headers and sequences
36+ multifasta = []
37+ current_header = None
38+ current_sequence = []
39+
40+ for line in lines :
41+ if line .startswith (">" ): # Header line
42+ if current_header : # Save the previous sequence
43+ sequence = "" .join (current_sequence )
44+ if not validate_sequence (sequence ):
45+ raise ValueError (f"Invalid characters in sequence under { current_header } " )
46+ multifasta .append (f"{ current_header } \n { sequence } " )
47+ current_header = line .strip () # Update header
48+ current_sequence = [] # Reset sequence buffer
49+ else : # Sequence line
50+ current_sequence .append (line .strip ())
51+
52+ # Add the last sequence
53+ if current_header and current_sequence :
54+ sequence = "" .join (current_sequence )
55+ if not validate_sequence (sequence ):
56+ raise ValueError (f"Invalid characters in sequence under { current_header } " )
57+ multifasta .append (f"{ current_header } \n { sequence } " )
58+
59+ # Write to file
60+ with open (filename , 'w' , encoding = 'utf-8' ) as f :
61+ f .write ("\n " .join (multifasta ) + "\n " ) # Ensure newline at end of file
62+
63+ print (f"FASTA file created: { filename } " )
4264
43-
4465@app .get ("/" )
4566async def read_root ():
4667 return {"message" : "Welcome to the MMSeqs2 API!" }
4768
48- @app .post ("/run_mmseqs" )
49- async def run_mmseqs (params : MMSeqsParams ):
50- # Create a unique job id
51- job_id = str (uuid4 ())
52- output_dir = f"/tmp/{ job_id } "
69+ @app .get ("/help" )
70+ def help ():
71+ try :
72+ results = subprocess .run (
73+ ["mmseqs" , "-h" ],
74+ capture_output = True ,
75+ text = True ,
76+ )
77+ return {"help" : results .stdout }
78+ except subprocess .CalledProcessError as e :
79+ raise HTTPException (status_code = 400 , detail = f"Command failed { e .stderr } " )
5380
54- # Prepare the output directory
55- os .makedirs (output_dir , exist_ok = True )
81+ @app .post ("/easycluster" )
82+ async def easycluster (request : Request ):
83+ data = await request .json ()
84+ logger .info (f"Received request data: { data } " )
85+
86+ BASE_DIR = "/app"
87+ query_filename = os .path .join (BASE_DIR , "in.fasta" )
88+ result_filename = os .path .join (BASE_DIR , "output" )
89+ tmp_dir = os .path .join (BASE_DIR , "tmp" )
5690
57- # Prepare paths
58- result_m8_path = os .path .join (output_dir , "result.m8" )
59- result_tsv_path = os .path .join (output_dir , "result.tsv" )
91+ os .makedirs (tmp_dir , exist_ok = True )
92+ open (result_filename , 'w' ).close () # Clear or create result file
6093
61- # Create the FASTA file
62- path_query = os .path .join (output_dir , "query.fasta" )
63- path_queryDB = path_query .replace ('fasta' , '' ) + ".db"
64- create_fastas_file_from_seq (params .query , path_query )
65- create_queryDB_from_seq (path_query )
94+ # Create the FASTA file from the query string
95+ create_fastas_file_from_seq (data ['query' ], query_filename )
6696
67- # Run the mmseqs2 search command
97+ # Run the mmseqs2 command
6898 command = [
69- "mmseqs" , "search" ,
70- path_queryDB ,
71- params .database ,
72- os .path .join (output_dir , "result" ),
73- output_dir ,
74- "--threads" , str (params .threads ),
75- "--sensitivity" , str (params .sensitivity )
76- ]
77-
99+ "mmseqs" ,
100+ "easy-cluster" ,
101+ query_filename ,
102+ result_filename ,
103+ '--min-seq-id' , str (data ['min_seq_id' ]),
104+ '-c' , str (data ['coverage' ]),
105+ '--cov-mode' , str (data ['cov_mode' ]),
106+ tmp_dir ]
107+ logger .info (f"Running command: { ' ' .join (command )} " )
108+
78109 try :
79- # Execute mmseqs search
80- subprocess .run (command , check = True )
81-
82- # Convert the results to BLAST+ format if requested
83- if params .blast_format :
84- # mmseqs convertalis queryDB targetDB resultDB resultDB.m8
85- # Convert to BLAST tabular format (BLAST m8 format)
86- convert_command = [
87- "mmseqs" , "convertalis" ,
88- params .query ,
89- params .database ,
90- os .path .join (output_dir , "result" ),
91- result_m8_path ,
92- ]
93- subprocess .run (convert_command , check = True )
94-
95- # Store the result path for m8 format
96- job_results [job_id ] = {
97- "status" : "completed" ,
98- "result_path" : result_m8_path
99- }
100- else :
101- # Store the result path for standard mmseqs2 output (TSV format)
102- job_results [job_id ] = {
103- "status" : "completed" ,
104- "result_path" : result_tsv_path
105- }
106-
107- return {"job_id" : job_id }
108- except subprocess .CalledProcessError as e :
109- raise HTTPException (status_code = 500 , detail = f"mmseqs2 failed: { str (e )} " )
110-
111- @app .get ("/results/{job_id}" )
112- async def get_results (job_id : str ):
113- # Check if the job exists
114- if job_id not in job_results :
115- raise HTTPException (status_code = 404 , detail = "Job not found" )
110+ result = subprocess .run (command , capture_output = True , text = True , check = True )
111+ logger .info (f"Command output: { result .stdout } " )
116112
117- # Get the result path
118- result = job_results [job_id ]
119-
120- # Read and return the result (assuming it's a text file you want to read and return)
121- result_file = result ["result_path" ]
122- if os .path .exists (result_file ):
123- with open (result_file , "r" ) as file :
124- data = file .read ()
125- return {"status" : result ["status" ], "results" : data }
126- else :
127- raise HTTPException (status_code = 404 , detail = "Result file not found" )
113+ except subprocess .CalledProcessError as e :
114+ logger .error (f"Command failed with return code { e .returncode } " )
115+ logger .error (f"STDOUT: { e .stdout } " )
116+ logger .error (f"STDERR: { e .stderr } " )
117+ raise HTTPException (status_code = 500 , detail = f"Command failed: { e .stderr } " )
128118
119+ with open ("/app/output_all_seqs.fasta" , 'r' ) as file :
120+ logger .info (f"Reading result file: /app/output_all_seqs.fasta" )
121+ result = file .read ()
122+
123+ return result
129124
130125if __name__ == '__main__' :
131126 import uvicorn
132127
133- uvicorn .run ("app:app" , host = "0.0.0.0" , port = 8000 , reload = True )
128+ uvicorn .run ("app:app" , host = "0.0.0.0" , port = 8001 , reload = True )
0 commit comments