Skip to content

Commit 76f04f4

Browse files
authored
fix(rp2erxn.py): skips without failure transformation having dot-containing products (#8)
Column "Product SMILES" from RP2 output file is supposed to contain only molecule per row (without dots)
1 parent a68c28c commit 76f04f4

File tree

1 file changed

+28
-5
lines changed

1 file changed

+28
-5
lines changed

rp2paths/rp2erxn.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ def compute(infile, cmpdfile='compounds.txt', rxnfile='reactions.txt',
201201

202202
# 1) Check consistency and 2) Populate compounds
203203
all_cmpds = dict()
204+
banished_trs = set()
204205
for tid in sorted(content.keys()): # Order determine CMPD IDs
205206
first = True
206207
for row in content[tid]:
@@ -228,20 +229,42 @@ def compute(infile, cmpdfile='compounds.txt', rxnfile='reactions.txt',
228229
sys.exit(0)
229230
try:
230231
assert prods_from_rxn == prods_from_cmpd
231-
except BaseException:
232-
print('Assertion error: differences in products')
233-
print(tid)
234-
print(prods_from_rxn, prods_from_cmpd)
235-
sys.exit(0)
232+
except AssertionError:
233+
if any("." in smi for smi in prods_from_cmpd):
234+
print(
235+
"Unexpected multiple products detected in 'Product "
236+
"SMILES' column from RetroPath2.0 result file. This "
237+
"is likely due to a sanitization issue from "
238+
"RetroPath2.0 result file."
239+
)
240+
print(f" L Transformation skipped: {tid}")
241+
print(f" L Products from reaction: {prods_from_rxn}")
242+
print(f" L Products from 'Product SMILES' column: {prods_from_cmpd}")
243+
banished_trs.add(tid)
244+
continue
245+
else:
246+
print('Assertion error: differences in products')
247+
print(tid)
248+
print(prods_from_rxn, prods_from_cmpd)
249+
sys.exit(0)
236250
# Populate
237251
for smi in sorted(list(subs_from_rxn | prods_from_rxn)):
238252
if smi not in all_cmpds.keys():
239253
cmpd = Compound(smi)
240254
all_cmpds[smi] = cmpd
241255

256+
# Debug info
257+
if len(banished_trs) > 0:
258+
print(f"Total transformations skipped: {len(banished_trs)}")
259+
for tid in sorted(banished_trs):
260+
print(f" - {tid}")
261+
242262
# Populate transformations
243263
all_trs = dict()
244264
for tid in content.keys():
265+
# Skip transformations that were marked as banished
266+
if tid in banished_trs:
267+
continue
245268
first = True
246269
for row in content[tid]:
247270
if first:

0 commit comments

Comments
 (0)