Skip to content

Commit 4769e62

Browse files
PYMOL-5418: Add USAlign implementation for structural alignment
1 parent 841c094 commit 4769e62

File tree

10 files changed

+2145
-2
lines changed

10 files changed

+2145
-2
lines changed

layer2/USalign.cpp

Lines changed: 1638 additions & 0 deletions
Large diffs are not rendered by default.

layer2/USalign.h

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#pragma once
2+
3+
#include <string>
4+
#include <vector>
5+
6+
#include <glm/glm.hpp>
7+
8+
namespace pymol::usalign {
9+
10+
struct Superposition {
11+
glm::dvec3 translation{0.0};
12+
glm::dmat3 rotation{1.0};
13+
};
14+
15+
struct TMAlignResult {
16+
double tm_score_mobile = 0.0; // normalized by mobile length
17+
double tm_score_target = 0.0; // normalized by target length
18+
double d0_target = 0.0; // d0 used for target normalization
19+
double d0_mobile = 0.0; // d0 used for mobile normalization
20+
double rmsd = 0.0;
21+
int aligned_length = 0; // aligned pairs within distance cutoff
22+
double seq_identity = 0.0;
23+
Superposition transform;
24+
std::vector<int> mobile_indices; // paired residue indices into mobile CA array
25+
std::vector<int> target_indices; // paired residue indices into target CA array
26+
std::string seq_mobile; // alignment string for mobile
27+
std::string seq_target; // alignment string for target
28+
std::string seq_match; // ':' close, '.' far, ' ' gap
29+
};
30+
31+
// DP workspace — single allocation reused across all seeds
32+
struct DPWorkspace {
33+
std::vector<double> score_flat;
34+
std::vector<double> val_flat;
35+
std::vector<char> path_flat;
36+
int rows = 0;
37+
int cols = 0;
38+
39+
// Scratch buffers for TMscore8_search and scoring.
40+
// xtm, ytm, r1, r2 are sized to min(xlen, ylen); xt is sized to xlen.
41+
// All score_fun8 n_cut values are bounded by min(xlen, ylen).
42+
std::vector<glm::dvec3> xtm, ytm, xt, r1, r2;
43+
44+
void resize(int xlen, int ylen);
45+
46+
double& score(int i, int j) { return score_flat[i * cols + j]; }
47+
double& val(int i, int j) { return val_flat[i * cols + j]; }
48+
bool path(int i, int j) const { return path_flat[i * cols + j] != 0; }
49+
void set_path(int i, int j, bool v) { path_flat[i * cols + j] = v ? 1 : 0; }
50+
};
51+
52+
/**
53+
* Perform TM-score structural alignment between two protein structures.
54+
*
55+
* @param target_ca Target structure CA coordinates (remains fixed)
56+
* @param mobile_ca Mobile structure CA coordinates (will be aligned to target)
57+
* @param target_seq Target sequence (single-letter amino acid codes)
58+
* @param mobile_seq Mobile sequence (single-letter amino acid codes)
59+
* @param fast Use fast mode with fewer iterations (default: false)
60+
* @return TMAlignResult containing TM-scores, RMSD, alignment, and transform
61+
*
62+
* @note TM-score ranges from 0 to 1; score > 0.5 indicates same fold
63+
* @note Complexity: O(n²) where n = min(target_len, mobile_len)
64+
* @note may use a lot of memory for structures above 10K residues.
65+
*/
66+
TMAlignResult TMalign(
67+
const std::vector<glm::dvec3>& target_ca,
68+
const std::vector<glm::dvec3>& mobile_ca,
69+
const std::string& target_seq,
70+
const std::string& mobile_seq,
71+
bool fast = false);
72+
73+
} // namespace pymol::usalign

layer3/Executive.cpp

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
#include "SceneRay.h"
7878
#include "ScrollBar.h"
7979
#include "SculptCache.h"
80+
#include "Seeker.h"
8081
#include "Selector.h"
8182
#include "Seq.h"
8283
#include "Setting.h"
@@ -114,6 +115,7 @@
114115
#include "ce_types.h"
115116
#endif
116117

118+
#include <glm/gtc/quaternion.hpp>
117119
#include <glm/gtc/type_ptr.hpp>
118120
#include <glm/vec3.hpp>
119121

@@ -17560,3 +17562,197 @@ pymol::Result<std::unordered_set<const pymol::CObject*>> ExecutiveGetObjectDeps(
1756017562
obj_set.erase(&obj);
1756117563
return obj_set;
1756217564
}
17565+
17566+
/**
17567+
* Run TM-align on two selections and return results.
17568+
*
17569+
* @param mobile_sele mobile selection (will be transformed)
17570+
* @param target_sele target selection (stays fixed)
17571+
* @param mobile_state state of mobile selection (0-based)
17572+
* @param target_state state of target selection (0-based)
17573+
* @param quiet suppress output
17574+
* @param transform apply superposition transform
17575+
* @param oname name for alignment object (empty = don't create)
17576+
* @param fast use fast mode (fewer iterations)
17577+
*/
17578+
pymol::Result<pymol::usalign::TMAlignResult> ExecutiveUSalign(PyMOLGlobals* G,
17579+
const char* mobile_sele, const char* target_sele, int mobile_state,
17580+
int target_state, int quiet, int transform, const char* oname, int fast)
17581+
{
17582+
// Resolve selections
17583+
auto sele_mobile = SelectorIndexByName(G, mobile_sele);
17584+
if (sele_mobile < 0)
17585+
return pymol::make_error("Invalid mobile selection: ", mobile_sele);
17586+
17587+
auto sele_target = SelectorIndexByName(G, target_sele);
17588+
if (sele_target < 0)
17589+
return pymol::make_error("Invalid target selection: ", target_sele);
17590+
17591+
// Extract CA coordinates and sequences
17592+
struct ResidueInfo {
17593+
glm::dvec3 coord;
17594+
char seq_char;
17595+
AtomInfoType* ai;
17596+
};
17597+
17598+
auto extract_ca = [&](SelectorID_t sele,
17599+
int state) -> std::vector<ResidueInfo> {
17600+
std::vector<ResidueInfo> residues;
17601+
SeleCoordIterator iter(G, sele, state);
17602+
while (iter.next()) {
17603+
auto* ai = iter.getAtomInfo();
17604+
if (ai->flags & cAtomFlag_guide) {
17605+
float* c = iter.getCoord();
17606+
ResidueInfo ri;
17607+
ri.coord = glm::dvec3(c[0], c[1], c[2]);
17608+
ri.seq_char = SeekerGetAbbr(G, LexStr(G, ai->resn), 'O', 'X');
17609+
ri.ai = ai;
17610+
residues.push_back(ri);
17611+
}
17612+
}
17613+
return residues;
17614+
};
17615+
17616+
auto mobile_res = extract_ca(sele_mobile, mobile_state);
17617+
auto target_res = extract_ca(sele_target, target_state);
17618+
17619+
if (mobile_res.size() < 3) {
17620+
return pymol::make_error("Mobile selection has fewer than 3 guide atoms (",
17621+
mobile_res.size(), ")");
17622+
}
17623+
if (target_res.size() < 3) {
17624+
return pymol::make_error("Target selection has fewer than 3 guide atoms (",
17625+
target_res.size(), ")");
17626+
}
17627+
17628+
// Build coordinate vectors and sequences
17629+
std::vector<glm::dvec3> mobile_ca, target_ca;
17630+
std::string mobile_seq, target_seq;
17631+
mobile_ca.reserve(mobile_res.size());
17632+
target_ca.reserve(target_res.size());
17633+
17634+
for (const auto& r : mobile_res) {
17635+
mobile_ca.push_back(r.coord);
17636+
mobile_seq.push_back(r.seq_char);
17637+
}
17638+
for (const auto& r : target_res) {
17639+
target_ca.push_back(r.coord);
17640+
target_seq.push_back(r.seq_char);
17641+
}
17642+
17643+
// Run TM-align
17644+
auto result = pymol::usalign::TMalign(
17645+
target_ca, mobile_ca, target_seq, mobile_seq, fast != 0);
17646+
17647+
if (result.aligned_length < 1) {
17648+
return pymol::make_error("TM-align failed to find any alignment");
17649+
}
17650+
17651+
// Print results
17652+
if (!quiet) {
17653+
PRINTFB(G, FB_Executive, FB_Results)
17654+
" USalign: TM-score= %6.4f (normalized by target, N=%d, d0=%.2f)\n",
17655+
result.tm_score_target, static_cast<int>(target_ca.size()),
17656+
result.d0_target ENDFB(G);
17657+
PRINTFB(G, FB_Executive, FB_Results)
17658+
" USalign: TM-score= %6.4f (normalized by mobile, N=%d, d0=%.2f)\n",
17659+
result.tm_score_mobile, static_cast<int>(mobile_ca.size()),
17660+
result.d0_mobile ENDFB(G);
17661+
PRINTFB(G, FB_Executive, FB_Results)
17662+
" USalign: Aligned length= %d, RMSD= %5.2f, Seq_ID=n_identical/n_aligned= "
17663+
"%4.3f\n",
17664+
result.aligned_length, result.rmsd, result.seq_identity ENDFB(G);
17665+
}
17666+
17667+
// Apply transform to mobile object
17668+
if (transform) {
17669+
// Convert double-precision Superposition to float TTT
17670+
// USalign convention: y_aligned = R * x + t
17671+
// where x = mobile coords, y = target coords
17672+
// The rotation R and translation t transform mobile -> target space
17673+
17674+
const auto& sup = result.transform;
17675+
17676+
// Build a legacy-style 16-float TTT matrix
17677+
// TTT format: [R00 R01 R02 pre_x] [R10 R11 R12 pre_y]
17678+
// [R20 R21 R22 pre_z] [tx ty tz 1]
17679+
// where pre is the pre-translation (origin), and t is post-translation
17680+
// For a simple rotation+translation (no origin): pre=0, R=rotation,
17681+
// t=translation
17682+
17683+
glm::mat3 rot_f(sup.rotation);
17684+
glm::quat q = glm::quat_cast(rot_f);
17685+
glm::vec3 t(sup.translation);
17686+
17687+
// Create TTT: pretranslate=0, rotate=q, posttranslate=t
17688+
pymol::TTT ttt(glm::vec3(0.0f), q, t);
17689+
17690+
// Convert to legacy float[16] format for ExecuteCombineObjectTTT
17691+
auto legacy = pymol::TTT::as_pymol_2_legacy(ttt);
17692+
float tttf[16];
17693+
std::memcpy(tttf, glm::value_ptr(legacy), 16 * sizeof(float));
17694+
17695+
// Follow the same pattern as ExecutiveAlign:
17696+
// 1. Copy target's TTT and state matrix to mobile (reset to same frame)
17697+
// 2. Combine the alignment transform (reverse_order=true)
17698+
// Note: Only the first object in the mobile selection is transformed,
17699+
// matching ExecutiveAlign behavior for multi-object selections.
17700+
ObjectMolecule* mobile_obj = SelectorGetFirstObjectMolecule(G, sele_mobile);
17701+
ObjectMolecule* target_obj =
17702+
SelectorGetSingleObjectMolecule(G, sele_target);
17703+
if (mobile_obj && target_obj) {
17704+
ExecutiveMatrixCopy(G, target_obj->Name, mobile_obj->Name, 1, 1,
17705+
target_state, mobile_state, false, 0, quiet);
17706+
ExecutiveMatrixCopy(G, target_obj->Name, mobile_obj->Name, 2, 2,
17707+
target_state, mobile_state, false, 0, quiet);
17708+
ExecutiveCombineObjectTTT(G, mobile_obj->Name, tttf, true, -1);
17709+
}
17710+
}
17711+
17712+
// Create alignment object
17713+
if (oname && oname[0]) {
17714+
int align_state = target_state;
17715+
if (align_state < 0) {
17716+
align_state = SceneGetState(G);
17717+
}
17718+
17719+
ObjectMolecule* trg_obj = SelectorGetSingleObjectMolecule(G, sele_target);
17720+
ObjectMolecule* mob_obj = SelectorGetFirstObjectMolecule(G, sele_mobile);
17721+
17722+
if (trg_obj && mob_obj) {
17723+
int n_pair = result.aligned_length;
17724+
pymol::vla<int> align_vla(n_pair * 3);
17725+
int* id_p = align_vla.data();
17726+
17727+
for (int k = 0; k < n_pair; k++) {
17728+
int mi = result.mobile_indices[k];
17729+
int ti = result.target_indices[k];
17730+
if (mi < static_cast<int>(mobile_res.size()) &&
17731+
ti < static_cast<int>(target_res.size())) {
17732+
id_p[0] = AtomInfoCheckUniqueID(G, target_res[ti].ai);
17733+
id_p[1] = AtomInfoCheckUniqueID(G, mobile_res[mi].ai);
17734+
id_p[2] = 0;
17735+
id_p += 3;
17736+
}
17737+
}
17738+
17739+
ObjectAlignment* obj = nullptr;
17740+
{
17741+
pymol::CObject* execObj = ExecutiveFindObjectByName(G, oname);
17742+
if (execObj && execObj->type != cObjectAlignment) {
17743+
ExecutiveDelete(G, oname);
17744+
} else {
17745+
obj = dynamic_cast<ObjectAlignment*>(execObj);
17746+
}
17747+
}
17748+
obj = ObjectAlignmentDefine(
17749+
G, obj, align_vla, align_state, true, trg_obj, mob_obj);
17750+
obj->Color = ColorGetIndex(G, "yellow");
17751+
ObjectSetName(obj, oname);
17752+
ExecutiveManageObject(G, obj, 0, quiet);
17753+
SceneInvalidate(G);
17754+
}
17755+
}
17756+
17757+
return result;
17758+
}

layer3/Executive.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Z* -------------------------------------------------------------------
3838
#include "SpecRecSpecial.h"
3939
#include "Tracker.h"
4040
#include "TrackerList.h"
41+
#include "USalign.h"
4142
#include "Word.h"
4243
#include "vla.h"
4344

@@ -255,6 +256,11 @@ int ExecutiveAlign(PyMOLGlobals* G, const char* s1, const char* s2,
255256
float seq_wt, float radius, float scale, float base, float coord_wt,
256257
float expect, int window, float ante);
257258

259+
pymol::Result<pymol::usalign::TMAlignResult> ExecutiveUSalign(
260+
PyMOLGlobals* G, const char* mobile_sele, const char* target_sele,
261+
int mobile_state, int target_state, int quiet, int transform,
262+
const char* oname, int fast);
263+
258264
void ExecutiveUpdateColorDepends(PyMOLGlobals* G, ObjectMolecule* mol);
259265
void ExecutiveUpdateCoordDepends(PyMOLGlobals* G, ObjectMolecule* mol);
260266
pymol::Result<float> ExecutiveDistance(PyMOLGlobals* G, const char* nam,

layer4/Cmd.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,61 @@ static PyObject *CmdAlign(PyObject * self, PyObject * args)
19511951
}
19521952
}
19531953

1954+
static PyObject *CmdUSalign(PyObject * self, PyObject * args)
1955+
{
1956+
PyMOLGlobals *G = nullptr;
1957+
const char *mobile, *target, *oname;
1958+
int mobile_state, target_state, quiet, transform, fast;
1959+
API_SETUP_ARGS(G, self, args, "Ossiiiisi", &self,
1960+
&mobile, &target, &mobile_state, &target_state,
1961+
&quiet, &transform, &oname, &fast);
1962+
API_ASSERT(APIEnterNotModal(G));
1963+
1964+
OrthoLineType s1, s2;
1965+
int ok = (SelectorGetTmp(G, mobile, s1) >= 0) &&
1966+
(SelectorGetTmp(G, target, s2) >= 0);
1967+
1968+
// Store results in locals — can't call Py_BuildValue until after APIExit
1969+
double tm_target = 0, tm_mobile = 0, rmsd = 0, seq_id = 0;
1970+
int ali_len = 0;
1971+
bool have_result = false;
1972+
std::string err_msg;
1973+
1974+
if (ok) {
1975+
auto res = ExecutiveUSalign(G, s1, s2,
1976+
mobile_state, target_state, quiet, transform, oname, fast);
1977+
if (res) {
1978+
auto& r = res.result();
1979+
tm_target = r.tm_score_target;
1980+
tm_mobile = r.tm_score_mobile;
1981+
rmsd = r.rmsd;
1982+
ali_len = r.aligned_length;
1983+
seq_id = r.seq_identity;
1984+
have_result = true;
1985+
} else {
1986+
err_msg = res.error().what();
1987+
}
1988+
}
1989+
1990+
SelectorFreeTmp(G, s1);
1991+
SelectorFreeTmp(G, s2);
1992+
APIExit(G);
1993+
1994+
if (have_result) {
1995+
return Py_BuildValue("{s:d,s:d,s:d,s:i,s:d}",
1996+
"tm_score_target", tm_target,
1997+
"tm_score_mobile", tm_mobile,
1998+
"RMSD", rmsd,
1999+
"alignment_length", ali_len,
2000+
"seq_identity", seq_id);
2001+
}
2002+
if (!err_msg.empty()) {
2003+
PyErr_SetString(PyExc_RuntimeError, err_msg.c_str());
2004+
return nullptr;
2005+
}
2006+
return APIFailure();
2007+
}
2008+
19542009
static PyObject *CmdGetCoordsAsNumPy(PyObject * self, PyObject * args)
19552010
{
19562011
PyMOLGlobals *G = nullptr;
@@ -6613,6 +6668,7 @@ static PyMethodDef Cmd_methods[] = {
66136668
{"unset", CmdUnset, METH_VARARGS},
66146669
{"unset_bond", CmdUnsetBond, METH_VARARGS},
66156670
{"update", CmdUpdate, METH_VARARGS},
6671+
{"usalign", CmdUSalign, METH_VARARGS},
66166672
{"window", CmdWindow, METH_VARARGS},
66176673
{"zoom", CmdZoom, METH_VARARGS},
66186674
{NULL, nullptr} /* sentinel */

modules/pymol/api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@
302302
intra_rms, \
303303
intra_rms_cur, \
304304
cealign, \
305-
pair_fit
305+
pair_fit, \
306+
usalign
306307

307308
#--------------------------------------------------------------------
308309
# ARE ALL OF THESE UNUSED AND/OR DEPRECATED (?)

0 commit comments

Comments
 (0)