Skip to content

Commit fcff825

Browse files
authored
fix: literature tool improvements (Round 79) (#121)
* fix: literature tool improvements (Round 79) Feature-79A-001: BaseRESTTool now applies JSON schema defaults to query params, fixing Semantic Scholar tools returning only paperId+title instead of full data Feature-79A-002: PubMed_get_article returns structured JSON (title, abstract, authors, MeSH terms, DOI) instead of raw XML Feature-79A-003: PubMed_get_related enriches results with article metadata (title, authors, journal) instead of returning bare PMID+score pairs Feature-79B-004: OpenCitations_get_citations now has client-side limit (default 100) to prevent 14K+ citation responses overflowing output buffers Feature-79B-008: MultiAgentLiteratureSearch _parse_result now unwraps nested JSON from agent results, fixing the broken pipeline * refactor: compress Round 79 code per code-simplifier principles - Flatten nested conditionals in client_side_limit and elink enrichment - Remove redundant variables and intermediate assignments - Consolidate duplicate return paths in elink enrichment - Simplify _parse_result control flow * fix: remove unused variable to pass ruff lint check
1 parent 0ec68f8 commit fcff825

File tree

5 files changed

+618
-22
lines changed

5 files changed

+618
-22
lines changed

src/tooluniverse/base_rest_tool.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,34 @@ def _build_params(self, args: Dict[str, Any]) -> Dict[str, Any]:
8888
# Get param mapping for this API
8989
param_mapping = self._get_param_mapping()
9090

91-
# Only add arguments that aren't path parameters
91+
# Params handled client-side only (not sent to API)
92+
client_only = (
93+
{"limit"}
94+
if self.tool_config.get("fields", {}).get("client_side_limit")
95+
else set()
96+
)
97+
9298
for key, value in args.items():
93-
if f"{{{key}}}" not in url_template and value is not None:
94-
# Use mapped parameter name if available
95-
param_name = param_mapping.get(key, key)
96-
params[param_name] = value
99+
if (
100+
key not in client_only
101+
and f"{{{key}}}" not in url_template
102+
and value is not None
103+
):
104+
params[param_mapping.get(key, key)] = value
105+
106+
# Apply schema defaults for optional params not provided by the caller
107+
for key, prop in (
108+
self.tool_config.get("parameter", {}).get("properties", {}).items()
109+
):
110+
if (
111+
key in client_only
112+
or key in params
113+
or key in args
114+
or f"{{{key}}}" in url_template
115+
):
116+
continue
117+
if "default" in prop and prop["default"] is not None:
118+
params[param_mapping.get(key, key)] = prop["default"]
97119

98120
return params
99121

@@ -199,7 +221,23 @@ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
199221
return special_result
200222

201223
# Use default response processing
202-
return self._process_response(response, url)
224+
result = self._process_response(response, url)
225+
226+
# Client-side limit for APIs that return unbounded lists
227+
if self.tool_config.get("fields", {}).get("client_side_limit"):
228+
props = self.tool_config.get("parameter", {}).get("properties", {})
229+
limit = arguments.get("limit", props.get("limit", {}).get("default"))
230+
data = result.get("data")
231+
if (
232+
limit is not None
233+
and isinstance(data, list)
234+
and len(data) > int(limit)
235+
):
236+
result["total_before_limit"] = len(data)
237+
result["data"] = data[: int(limit)]
238+
result["count"] = int(limit)
239+
240+
return result
203241

204242
except Exception as e:
205243
return {

src/tooluniverse/compose_scripts/enhanced_multi_agent_literature_search.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -266,21 +266,25 @@ def compose(
266266

267267

268268
def _parse_result(result):
269-
"""解析工具结果,确保返回字典格式"""
269+
"""Parse tool result into a dict, unwrapping nested JSON strings."""
270270
if isinstance(result, str):
271271
try:
272272
parsed = json.loads(result)
273-
if isinstance(parsed, dict):
274-
return parsed
275-
else:
276-
return {"result": parsed}
273+
return parsed if isinstance(parsed, dict) else {"result": parsed}
277274
except Exception:
278275
return {"result": result}
279-
elif isinstance(result, dict):
280-
# 如果已经是字典,直接返回
276+
if isinstance(result, dict):
277+
# Unwrap nested JSON in "result" key (e.g. from agent tools)
278+
inner = result.get("result")
279+
if isinstance(inner, str):
280+
try:
281+
parsed = json.loads(inner)
282+
if isinstance(parsed, dict):
283+
return parsed
284+
except Exception:
285+
pass
281286
return result
282-
else:
283-
return {"result": str(result)}
287+
return {"result": str(result)}
284288

285289

286290
def _format_papers_for_summary(papers):

src/tooluniverse/data/opencitations_tools.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,20 @@
9191
"doi": {
9292
"type": "string",
9393
"description": "DOI of the paper to find citations for. Do not include 'https://doi.org/' prefix. Examples: '10.1038/nature12373', '10.1002/jcc.21224'"
94+
},
95+
"limit": {
96+
"type": ["integer", "null"],
97+
"description": "Maximum number of citations to return. Highly-cited papers can have 10,000+ citations; use this to avoid oversized responses.",
98+
"default": 100
9499
}
95100
},
96101
"required": [
97102
"doi"
98103
]
99104
},
100105
"fields": {
101-
"endpoint": "https://api.opencitations.net/index/v1/citations/{doi}"
106+
"endpoint": "https://api.opencitations.net/index/v1/citations/{doi}",
107+
"client_side_limit": true
102108
},
103109
"type": "BaseRESTTool",
104110
"test_examples": [

src/tooluniverse/pubmed_tool.py

Lines changed: 122 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,102 @@ def parse_article(pmid: str, article_data: Dict[str, Any]) -> Dict[str, Any]:
287287
"error": f"Failed to fetch article summaries: {str(e)}",
288288
}
289289

290+
def _parse_efetch_xml(self, response) -> Dict[str, Any]:
291+
"""Parse PubMed efetch XML into structured article data."""
292+
try:
293+
root = ET.fromstring(response.text)
294+
except ET.ParseError:
295+
return {"status": "success", "data": response.text, "url": response.url}
296+
297+
def _text(el, path, default=""):
298+
found = el.find(path) if el is not None else None
299+
return found.text if found is not None and found.text else default
300+
301+
def _itertext(el, path):
302+
found = el.find(path) if el is not None else None
303+
return "".join(found.itertext()) if found is not None else ""
304+
305+
def _parse_article(article_el):
306+
cit = article_el.find("MedlineCitation")
307+
art = cit.find("Article") if cit is not None else None
308+
if cit is None or art is None:
309+
return None
310+
311+
pmid = _text(cit, "PMID")
312+
title = _itertext(art, "ArticleTitle")
313+
314+
# Abstract: join labeled sections
315+
abstract_parts = []
316+
for at in art.findall("Abstract/AbstractText") or []:
317+
label, text = at.get("Label", ""), "".join(at.itertext()).strip()
318+
if text:
319+
abstract_parts.append(f"{label}: {text}" if label else text)
320+
abstract = " ".join(abstract_parts)
321+
322+
# Authors (first 10)
323+
authors = []
324+
for au in (art.findall("AuthorList/Author") or [])[:10]:
325+
last, fore = au.findtext("LastName", ""), au.findtext("ForeName", "")
326+
name = f"{last} {fore}".strip() if last else fore
327+
if not name:
328+
continue
329+
entry = {"name": name}
330+
aff = _text(au, ".//Affiliation")
331+
if aff:
332+
entry["affiliation"] = aff
333+
authors.append(entry)
334+
335+
journal_el = art.find("Journal")
336+
journal = _text(journal_el, "Title") or _text(journal_el, "ISOAbbreviation")
337+
338+
doi = next(
339+
(
340+
eid.text
341+
for eid in art.findall("ELocationID")
342+
if eid.get("EIdType") == "doi" and eid.text
343+
),
344+
"",
345+
)
346+
347+
pd = art.find(".//PubDate")
348+
pub_year = _text(pd, "Year")
349+
pub_date = " ".join(
350+
filter(None, [pub_year, _text(pd, "Month"), _text(pd, "Day")])
351+
)
352+
353+
mesh = [
354+
d.text
355+
for d in cit.findall("MeshHeadingList/MeshHeading/DescriptorName")
356+
if d.text
357+
]
358+
pub_types = [pt.text for pt in art.findall(".//PublicationType") if pt.text]
359+
360+
return {
361+
"pmid": pmid,
362+
"title": title,
363+
"abstract": abstract or None,
364+
"authors": authors,
365+
"journal": journal or None,
366+
"pub_date": pub_date,
367+
"pub_year": pub_year,
368+
"doi": doi or None,
369+
"doi_url": f"https://doi.org/{doi}" if doi else None,
370+
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
371+
"mesh_terms": mesh or None,
372+
"publication_types": pub_types or None,
373+
}
374+
375+
articles = [
376+
a
377+
for a in (_parse_article(el) for el in root.findall(".//PubmedArticle"))
378+
if a
379+
]
380+
data = articles[0] if len(articles) == 1 else articles
381+
result = {"status": "success", "data": data, "url": response.url}
382+
if len(articles) != 1:
383+
result["count"] = len(articles)
384+
return result
385+
290386
def _fetch_abstracts(self, pmid_list: list[str]) -> Dict[str, str]:
291387
"""Best-effort abstract fetch via efetch XML for a list of PMIDs."""
292388
pmids = [str(p).strip() for p in (pmid_list or []) if str(p).strip()]
@@ -503,6 +599,30 @@ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
503599
limit = None
504600
if limit is not None:
505601
links = links[:limit]
602+
603+
# Enrich with article metadata
604+
pmids = [
605+
str(lk["id"] if isinstance(lk, dict) else lk)
606+
for lk in links
607+
]
608+
scores = {
609+
str(lk["id"]): lk.get("score")
610+
for lk in links
611+
if isinstance(lk, dict)
612+
}
613+
summary = self._fetch_summaries(pmids)
614+
if summary.get("status") == "success" and summary.get(
615+
"data"
616+
):
617+
for item in summary["data"]:
618+
score = (
619+
scores.get(str(item.get("pmid", "")))
620+
if isinstance(item, dict)
621+
else None
622+
)
623+
if score is not None:
624+
item["relevance_score"] = score
625+
links = summary["data"]
506626
return {
507627
"status": "success",
508628
"data": links,
@@ -531,12 +651,8 @@ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
531651
"url": response.url,
532652
}
533653
except Exception:
534-
# For XML responses (efetch), return as text
535-
return {
536-
"status": "success",
537-
"data": response.text,
538-
"url": response.url,
539-
}
654+
# For XML responses (efetch), parse into structured data
655+
return self._parse_efetch_xml(response)
540656

541657
except Exception as e:
542658
return {

0 commit comments

Comments
 (0)