Skip to content

Commit 5456a67

Browse files
PYMOL-5436: Add new command 'protonate' for chemically-aware protonation
1 parent 173f174 commit 5456a67

File tree

4 files changed

+413
-0
lines changed

4 files changed

+413
-0
lines changed

modules/pymol/api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@
219219
pbc_unwrap, \
220220
pbc_wrap, \
221221
protect, \
222+
protonate, \
222223
push_undo, \
223224
rebond, \
224225
reference, \

modules/pymol/editing.py

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,288 @@ def h_add(selection="(all)", quiet=1, state=0, legacy=0, _self=cmd):
12551255
return r
12561256

12571257

1258+
# Textbook pKa values for standard titratable residues.
1259+
# Used as fallback when pdb2pqr is not available.
1260+
# At a given pH, if pH > pKa the group is deprotonated (fewer H).
1261+
# Format: (resn, atom_names) → pKa
1262+
# H count when protonated vs deprotonated is specified per entry.
1263+
_TITRATABLE_PKA = {
1264+
# Carboxylates: protonated = 1H on OD2/OE2, deprotonated = 0H
1265+
('ASP', ('OD1', 'OD2')): 3.65,
1266+
('GLU', ('OE1', 'OE2')): 4.25,
1267+
# Histidine: protonated = H on both ND1+NE2, deprotonated = H on NE2 only
1268+
# Note: assumes HIE tautomer. pdb2pqr path handles HID/HIE/HIP properly.
1269+
('HIS', ('ND1',)): 6.00,
1270+
# Cysteine: protonated = 1H on SG, deprotonated = 0H
1271+
('CYS', ('SG',)): 8.18,
1272+
# Tyrosine: protonated = 1H on OH, deprotonated = 0H
1273+
('TYR', ('OH',)): 10.07,
1274+
# Lysine: protonated = 3H on NZ, deprotonated = 2H
1275+
('LYS', ('NZ',)): 10.53,
1276+
# Arginine: practically always protonated
1277+
('ARG', ('NH1', 'NH2')): 12.48,
1278+
}
1279+
1280+
def _force_add_h(obj_name, atom_sele, deficit, state, _self):
1281+
"""Temporarily bump valence to force h_add on a saturated atom."""
1282+
_self.alter(atom_sele, f"valence = valence + {deficit}")
1283+
try:
1284+
_self.h_add(atom_sele, state=state)
1285+
finally:
1286+
_self.alter(atom_sele, f"valence = valence - {deficit}")
1287+
1288+
def _remove_excess_h(obj_name, h_sele, excess, _self):
1289+
"""Remove a specific number of H atoms from a selection."""
1290+
from pymol import stored
1291+
stored._prot_ids = []
1292+
_self.iterate(h_sele, "stored._prot_ids.append(ID)")
1293+
for aid in stored._prot_ids[-excess:]:
1294+
_self.remove(f"{obj_name} and ID {aid}")
1295+
1296+
def _protonate_fallback(selection, obj_name, pH, state, quiet, _self):
1297+
"""pH-aware protonation using textbook pKa values.
1298+
1299+
Adds all H via h_add, then adjusts titratable groups based on
1300+
whether pH > pKa (deprotonated) or pH < pKa (protonated).
1301+
"""
1302+
from pymol import stored
1303+
1304+
obj_sele = f"({selection}) and {obj_name}"
1305+
1306+
# Strip existing H and add all H geometrically
1307+
_self.remove(f"hydro and (bymol ({obj_sele}))")
1308+
_self.h_add(obj_sele, state=state)
1309+
1310+
# Adjust titratable groups
1311+
for (resn, atom_names), pKa in _TITRATABLE_PKA.items():
1312+
deprotonated = pH > pKa
1313+
1314+
if not deprotonated:
1315+
# Group should be protonated. h_add may not have added
1316+
# H if the valence is already satisfied (e.g. carboxylate
1317+
# double bond, His imidazole). Bump valence to force it.
1318+
for atom_name in atom_names:
1319+
sele = f"{obj_name} and resn {resn} and name {atom_name}"
1320+
stored._prot_ids = []
1321+
_self.iterate(sele, "stored._prot_ids.append(ID)")
1322+
for aid in stored._prot_ids:
1323+
atom_sele = f"{obj_name} and ID {aid}"
1324+
h_count = _self.count_atoms(
1325+
f"hydro and neighbor ({atom_sele})")
1326+
if h_count == 0:
1327+
_force_add_h(obj_name, atom_sele, 1, state,
1328+
_self)
1329+
continue
1330+
1331+
# pH > pKa: group should be deprotonated — remove H
1332+
for atom_name in atom_names:
1333+
h_sele = (f"hydro and neighbor "
1334+
f"({obj_name} and resn {resn} and name {atom_name})")
1335+
h_count = _self.count_atoms(h_sele)
1336+
if h_count > 0:
1337+
if resn == 'LYS' and atom_name == 'NZ':
1338+
# Deprotonated Lys: NH2 (2H), not NH3+ (3H)
1339+
_remove_excess_h(obj_name, h_sele, 1, _self)
1340+
else:
1341+
_self.remove(h_sele)
1342+
1343+
if not quiet:
1344+
total_h = _self.count_atoms(f"hydro and (bymol ({obj_sele}))")
1345+
print(f" protonate: added {total_h} hydrogens at pH {pH:.1f}"
1346+
" (using textbook pKa values)")
1347+
1348+
def _protonate_pdb2pqr(selection, obj_name, pH, ff, state, quiet,
1349+
exe, is_v3, _self):
1350+
"""pH-aware protonation using pdb2pqr/PROPKA."""
1351+
import os
1352+
import shutil
1353+
import subprocess
1354+
import tempfile
1355+
from pymol import stored
1356+
1357+
obj_sele = f"({selection}) and {obj_name}"
1358+
1359+
tmpdir = tempfile.mkdtemp()
1360+
try:
1361+
infile = os.path.join(tmpdir, 'in.pdb')
1362+
outfile = os.path.join(tmpdir, 'out.pqr')
1363+
tmp_obj = _self.get_unused_name('_prot_tmp')
1364+
1365+
_self.save(infile, obj_name, state)
1366+
1367+
chain_flag = '--keep-chain' if is_v3 else '--chain'
1368+
args = [exe,
1369+
f'--ff={ff.upper()}',
1370+
chain_flag,
1371+
'--titration-state-method=propka',
1372+
f'--with-ph={pH:f}',
1373+
infile, outfile]
1374+
1375+
p = subprocess.Popen(args, cwd=tmpdir,
1376+
stdout=subprocess.PIPE,
1377+
stderr=subprocess.PIPE)
1378+
stdout, stderr = p.communicate()
1379+
1380+
if p.returncode != 0:
1381+
if not quiet:
1382+
print(stderr.decode('utf-8', errors='replace'))
1383+
raise pymol.CmdException(
1384+
f'pdb2pqr failed (exit {p.returncode})')
1385+
1386+
if not quiet and stdout:
1387+
print(stdout.decode('utf-8', errors='replace'))
1388+
1389+
_self.load(outfile, tmp_obj, quiet=1)
1390+
1391+
# Build H-count map from pdb2pqr output using selections
1392+
# Key: (chain, resi, resn, name) → target H count
1393+
stored._prot_hcount = {}
1394+
stored._prot_keys = []
1395+
_self.iterate(
1396+
f"{tmp_obj} and not hydro",
1397+
"k = (chain, resi, resn, name);"
1398+
"stored._prot_keys.append(k);"
1399+
"stored._prot_hcount[k] = 0")
1400+
1401+
for chain, resi, resn, name in stored._prot_keys:
1402+
heavy_sele = (
1403+
f'({tmp_obj} and chain "{chain}" and '
1404+
f'resi {resi} and name {name})')
1405+
h_count = _self.count_atoms(
1406+
f"hydro and neighbor ({heavy_sele})")
1407+
if h_count > 0:
1408+
stored._prot_hcount[
1409+
(chain, resi, resn, name)] = h_count
1410+
1411+
# Strip existing H and add all H geometrically
1412+
_self.remove(f"hydro and (bymol ({obj_sele}))")
1413+
_self.h_add(obj_sele, state=state)
1414+
1415+
# Compare and adjust H counts per heavy atom
1416+
for key, target_h in stored._prot_hcount.items():
1417+
chain, resi, resn, name = key
1418+
heavy_sele = (
1419+
f'({obj_name} and chain "{chain}" and '
1420+
f'resi {resi} and name {name})')
1421+
if _self.count_atoms(heavy_sele) == 0:
1422+
continue
1423+
h_sele = f"hydro and neighbor ({heavy_sele})"
1424+
current_h = _self.count_atoms(h_sele)
1425+
1426+
if current_h > target_h:
1427+
excess = current_h - target_h
1428+
_remove_excess_h(obj_name, h_sele, excess, _self)
1429+
elif current_h < target_h:
1430+
deficit = target_h - current_h
1431+
_force_add_h(obj_name, heavy_sele, deficit, state,
1432+
_self)
1433+
1434+
_self.delete(tmp_obj)
1435+
1436+
if not quiet:
1437+
total_h = _self.count_atoms(
1438+
f"hydro and (bymol ({obj_sele}))")
1439+
print(f" protonate: added {total_h} hydrogens at pH {pH:.1f}")
1440+
1441+
finally:
1442+
shutil.rmtree(tmpdir, ignore_errors=True)
1443+
1444+
def protonate(selection="all", pH=7.4, ff="amber", state=0,
1445+
quiet=1, _self=cmd):
1446+
'''
1447+
DESCRIPTION
1448+
1449+
"protonate" adds hydrogens with pH-dependent protonation states.
1450+
When pdb2pqr is available, uses PROPKA for per-residue pKa prediction.
1451+
Otherwise, falls back to textbook pKa values for standard titratable
1452+
residues.
1453+
1454+
Unlike "h_add" which fills all open valences, "protonate" considers
1455+
pKa values to determine which atoms should be protonated at the
1456+
given pH.
1457+
1458+
Heavy atoms and their visual settings (colors, representations) are
1459+
preserved. Only hydrogens are modified.
1460+
1461+
USAGE
1462+
1463+
protonate [ selection [, pH [, ff [, state [, quiet ]]]]]
1464+
1465+
ARGUMENTS
1466+
1467+
selection = string {default: all}
1468+
1469+
pH = float: target pH for protonation {default: 7.4}
1470+
1471+
ff = string: pdb2pqr forcefield (amber, charmm, parse, etc.) {default: amber}
1472+
1473+
state = int {default: 0 (all states)}
1474+
1475+
quiet = int {default: 1}
1476+
1477+
NOTES
1478+
1479+
When pdb2pqr is installed (included with PyMOL bundle), PROPKA is
1480+
used for accurate per-residue pKa prediction that accounts for the
1481+
protein microenvironment.
1482+
1483+
Without pdb2pqr, textbook pKa values are used:
1484+
Asp 3.65, Glu 4.25, His 6.00, Cys 8.18,
1485+
Tyr 10.07, Lys 10.53, Arg 12.48
1486+
1487+
At biological pH (7.4):
1488+
- Asp/Glu carboxylates are deprotonated (COO-)
1489+
- Lys amines are protonated (NH3+)
1490+
- His imidazole is deprotonated
1491+
1492+
SEE ALSO
1493+
1494+
h_add, h_fill
1495+
'''
1496+
import shutil
1497+
import subprocess
1498+
from . import selector
1499+
1500+
pH = float(pH)
1501+
if not (0.0 <= pH <= 14.0):
1502+
raise pymol.CmdException(
1503+
f"pH value {pH} out of range (0-14)")
1504+
state = int(state)
1505+
quiet = int(quiet)
1506+
1507+
selection = selector.process(selection)
1508+
1509+
obj_names = _self.get_object_list(selection)
1510+
if not obj_names:
1511+
raise pymol.CmdException("No objects in selection")
1512+
1513+
# Find pdb2pqr executable
1514+
exe = (shutil.which('pdb2pqr') or
1515+
shutil.which('pdb2pqr30') or
1516+
shutil.which('pdb2pqr_cli'))
1517+
1518+
is_v3 = True
1519+
if exe:
1520+
try:
1521+
p = subprocess.Popen([exe, '--version'],
1522+
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1523+
ver_out, _ = p.communicate()
1524+
ver_str = ver_out.decode('utf-8', errors='replace').strip()
1525+
is_v3 = ver_str.split()[-1].startswith('3')
1526+
except (subprocess.SubprocessError, OSError, ValueError):
1527+
pass
1528+
1529+
for obj_name in obj_names:
1530+
if exe:
1531+
_protonate_pdb2pqr(selection, obj_name, pH, ff, state,
1532+
quiet, exe, is_v3, _self=_self)
1533+
else:
1534+
if not quiet:
1535+
print(" protonate: pdb2pqr not found, using "
1536+
"textbook pKa values")
1537+
_protonate_fallback(selection, obj_name, pH, state,
1538+
quiet, _self=_self)
1539+
12581540

12591541
def sort(object="",_self=cmd):
12601542
'''

modules/pymol/keywords.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ def get_command_keywords(self_cmd=cmd):
204204
'pi_interactions': [ self_cmd.pi_interactions , 0, 0 , '' , parsing.STRICT ],
205205
'pop' : [ self_cmd.pop , 0 , 0 , '' , parsing.STRICT ],
206206
'protect' : [ self_cmd.protect , 0 , 0 , '' , parsing.STRICT ],
207+
'protonate' : [ self_cmd.protonate , 0 , 0 , '' , parsing.STRICT ],
207208
'pseudoatom' : [ self_cmd.pseudoatom , 0 , 0 , '' , parsing.STRICT ],
208209
'pwd' : [ self_cmd.pwd , 0 , 0 , '' , parsing.STRICT ],
209210
'python' : [ self_cmd.helping.python , 0 , 2 , ',' , parsing.PYTHON_BLOCK ],

0 commit comments

Comments
 (0)