Skip to content

Commit 9e5ffc2

Browse files
tmihalacTheodor Mihalache
authored andcommitted
Fixed prompts to be more optimized to Llama 3.1 and more general
Signed-off-by: Theodor Mihalache <tmihalac@tmihalac-thinkpadp1gen7.rmtusfl.csb>
1 parent c10ab50 commit 9e5ffc2

19 files changed

+416
-111
lines changed

kustomize/base/exploit-iq-config.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ functions:
6767
enable_functions_usage_search: true
6868
Function Locator:
6969
_type: package_and_function_locator
70+
Function Library Version Finder:
71+
_type: calling_function_library_version_finder
7072
Code Semantic Search:
7173
_type: local_vdb_retriever
7274
embedder_name: nim_embedder
@@ -98,6 +100,7 @@ functions:
98100
- Call Chain Analyzer
99101
- Function Caller Finder
100102
- Function Locator
103+
- Function Library Version Finder
101104
max_concurrency: null
102105
max_iterations: 10
103106
prompt_examples: false
@@ -135,7 +138,7 @@ functions:
135138
# vex_format: csaf
136139
cve_http_output:
137140
_type: cve_http_output
138-
url: CALLBACK_URL_PLACEHOLDER
141+
url: https://exploit-iq-client.theodor-test.svc:8443
139142
endpoint: /api/v1/reports
140143
auth_type: bearer
141144
token_path: /var/run/secrets/kubernetes.io/serviceaccount/token

kustomize/config-http-openai-local.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ functions:
8383
max_retries: 5
8484
Container Analysis Data:
8585
_type: container_image_analysis_data
86+
Function Library Version Finder:
87+
_type: calling_function_library_version_finder
8688
cve_agent_executor:
8789
_type: cve_agent_executor
8890
llm_name: cve_agent_executor_llm
@@ -94,6 +96,7 @@ functions:
9496
- Call Chain Analyzer
9597
- Function Caller Finder
9698
- Function Locator
99+
- Function Library Version Finder
97100
max_concurrency: null
98101
max_iterations: 10
99102
prompt_examples: false

src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,6 +1113,7 @@ def _process_call(start_idx: int, open_paren_pos: int) -> bool:
11131113
documents_of_functions=documents_of_functions,
11141114
callee_function_name=callee_function_name,
11151115
type_inheritance=type_inheritance,
1116+
callee_declaring_fqcn=declaring_fqcn,
11161117
):
11171118
logger.debug(
11181119
"__check_identifier_resolved_to_callee_function_package resolved successfully - "
@@ -1160,6 +1161,7 @@ def _process_method_ref(dc_idx: int, ref_len: int, make_ctor: bool) -> bool:
11601161
documents_of_functions=documents_of_functions,
11611162
callee_function_name=callee_function_name,
11621163
type_inheritance=type_inheritance,
1164+
callee_declaring_fqcn=declaring_fqcn,
11631165
)
11641166

11651167
# ---------------------------
@@ -1214,7 +1216,8 @@ def __check_identifier_resolved_to_callee_function_package(
12141216
target_class_names: frozenset[str],
12151217
documents_of_functions: list[Document],
12161218
callee_function_name: str,
1217-
type_inheritance: dict[Tuple[str, str], List[Tuple[str, str]]]
1219+
type_inheritance: dict[Tuple[str, str], List[Tuple[str, str]]],
1220+
callee_declaring_fqcn: str = "",
12181221
) -> bool:
12191222
"""
12201223
Decide if the found call expression (`identifier_function`) in `function` actually targets
@@ -1658,16 +1661,28 @@ def _extract_ctor_type(expr: str) -> str:
16581661
if _type_matches_callee(iface_type):
16591662
return True
16601663

1661-
if recv_trim == 'super':
1662-
try:
1663-
caller_inheritance_list = get_target_class_names(type_inheritance[(caller_fqcn, caller_src)])
1664-
except KeyError:
1665-
caller_inheritance_list = []
1666-
for cand in caller_inheritance_list:
1667-
if cand == caller_fqcn:
1668-
continue
1669-
if _type_token_matches_callee(cand): # cand is already FQCN (CHANGED: fqcn-only)
1670-
return True
1664+
if recv_trim == 'super' and callee_declaring_fqcn:
1665+
# Don't match super calls from root-package functions.
1666+
# super delegates to the parent's own implementation which is
1667+
# self-contained. Allowing it at the app→dependency boundary
1668+
# lets the CCA's backtracking route through the entire
1669+
# dependency hierarchy via polymorphic dispatch, producing
1670+
# false-positive chains (e.g. handler.handle() hops).
1671+
if self.is_root_package(function):
1672+
pass # skip – super from app code is not a valid entry point
1673+
else:
1674+
try:
1675+
caller_inheritance = type_inheritance[(caller_fqcn, caller_src)]
1676+
except KeyError:
1677+
caller_inheritance = []
1678+
# super resolves to the direct parent class only; match only if the
1679+
# direct parent IS the callee's declaring class (exact match).
1680+
for cand_fqcn, _cand_src in caller_inheritance:
1681+
if cand_fqcn == caller_fqcn:
1682+
continue
1683+
if cand_fqcn == callee_declaring_fqcn or cand_fqcn == callee_declaring_fqcn.replace('$', '.'):
1684+
return True
1685+
break
16711686

16721687
if recv_trim == 'this':
16731688
if caller_fqcn and _type_token_matches_callee(caller_fqcn):
@@ -1728,16 +1743,23 @@ def _extract_ctor_type(expr: str) -> str:
17281743
if _type_matches_callee(iface_type):
17291744
return True
17301745

1731-
if recv_raw == 'super':
1732-
try:
1733-
caller_inheritance_list = get_target_class_names(type_inheritance[(caller_fqcn, caller_src)])
1734-
except KeyError:
1735-
caller_inheritance_list = []
1736-
for cand in caller_inheritance_list:
1737-
if cand == caller_fqcn:
1738-
continue
1739-
if _type_token_matches_callee(cand): # fqcn-only
1740-
return True
1746+
if recv_raw == 'super' and callee_declaring_fqcn:
1747+
# Don't match super calls from root-package functions (see early-path comment).
1748+
if self.is_root_package(function):
1749+
pass # skip
1750+
else:
1751+
try:
1752+
caller_inheritance = type_inheritance[(caller_fqcn, caller_src)]
1753+
except KeyError:
1754+
caller_inheritance = []
1755+
# super resolves to the direct parent class only; match only if the
1756+
# direct parent IS the callee's declaring class (exact match).
1757+
for cand_fqcn, _cand_src in caller_inheritance:
1758+
if cand_fqcn == caller_fqcn:
1759+
continue
1760+
if cand_fqcn == callee_declaring_fqcn or cand_fqcn == callee_declaring_fqcn.replace('$', '.'):
1761+
return True
1762+
break
17411763
elif recv_raw == 'this':
17421764
if caller_fqcn and _type_token_matches_callee(caller_fqcn):
17431765
return True

src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,10 +414,10 @@ def get_relevant_documents(self, query: str) -> tuple[List[Document], bool]:
414414
return matching_documents, self.found_path
415415

416416
def extract_from_query(self, query: str) -> tuple[str, str, str]:
417-
(package_name, function) = tuple( query.splitlines()[0].strip('"\'').replace("#", ".").split(","))
417+
(package_name, function) = tuple( query.splitlines()[0].strip('"\'\u2018\u2019\u201c\u201d').replace("#", ".").split(","))
418418

419419
class_name = function.rpartition('.')[0]
420-
method_name = function.rpartition('.')[2]
420+
method_name = re.sub(r'\(.*\)$', '', function.rpartition('.')[2])
421421

422422
if not class_name and self.is_java_fqcn(package_name):
423423
class_name = package_name

src/vuln_analysis/configs/config-http-nim.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ functions:
5656
enable_functions_usage_search: true
5757
Function Locator:
5858
_type: package_and_function_locator
59+
Function Library Version Finder:
60+
_type: calling_function_library_version_finder
5961
Code Semantic Search:
6062
_type: local_vdb_retriever
6163
embedder_name: nim_embedder
@@ -87,6 +89,7 @@ functions:
8789
- Call Chain Analyzer
8890
- Function Caller Finder
8991
- Function Locator
92+
- Function Library Version Finder
9093
max_concurrency: null
9194
max_iterations: 10
9295
prompt_examples: false

src/vuln_analysis/configs/config-http-openai.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ functions:
6363
enable_functions_usage_search: true
6464
Function Locator:
6565
_type: package_and_function_locator
66+
Function Library Version Finder:
67+
_type: calling_function_library_version_finder
6668
Code Semantic Search:
6769
_type: local_vdb_retriever
6870
embedder_name: nim_embedder
@@ -94,6 +96,7 @@ functions:
9496
- Call Chain Analyzer
9597
- Function Caller Finder
9698
- Function Locator
99+
- Function Library Version Finder
97100
max_concurrency: null
98101
max_iterations: 10
99102
prompt_examples: false

src/vuln_analysis/configs/config-tracing.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ functions:
6767
enable_functions_usage_search: true
6868
Function Locator:
6969
_type: package_and_function_locator
70+
Function Library Version Finder:
71+
_type: calling_function_library_version_finder
7072
Code Semantic Search:
7173
_type: local_vdb_retriever
7274
embedder_name: nim_embedder
@@ -98,6 +100,7 @@ functions:
98100
- Call Chain Analyzer
99101
- Function Caller Finder
100102
- Function Locator
103+
- Function Library Version Finder
101104
max_concurrency: null
102105
max_iterations: 10
103106
prompt_examples: false

src/vuln_analysis/configs/config.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ functions:
6060
max_retries: 5
6161
Container Analysis Data:
6262
_type: container_image_analysis_data
63+
Function Library Version Finder:
64+
_type: calling_function_library_version_finder
6365
cve_agent_executor:
6466
_type: cve_agent_executor
6567
llm_name: cve_agent_executor_llm
@@ -68,6 +70,7 @@ functions:
6870
- Docs Semantic Search
6971
# - Code Keyword Search # Uncomment to enable keyword search
7072
- CVE Web Search
73+
- Function Library Version Finder
7174
max_concurrency: null
7275
max_iterations: 10
7376
prompt_examples: false

src/vuln_analysis/functions/cve_agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,16 @@ async def _create_agent(config: CVEAgentExecutorToolConfig, builder: Builder,
8888
(tool.name == ToolNames.FUNCTION_CALLER_FINDER and (not config.transitive_search_tool_enabled or
8989
state.code_index_path is None)) or
9090
(tool.name == ToolNames.FUNCTION_LOCATOR and (not config.transitive_search_tool_enabled or
91+
state.code_index_path is None)) or
92+
(tool.name == ToolNames.FUNCTION_LIBRARY_VERSION_FINDER and (not config.transitive_search_tool_enabled or
9193
state.code_index_path is None))
9294
)
9395
]
9496

9597
tools = [
9698
tool for tool in tools
97-
if not (tool.name == ToolNames.FUNCTION_CALLER_FINDER and state.original_input.input.image.ecosystem != Ecosystem.GO)
99+
if not ((tool.name == ToolNames.FUNCTION_CALLER_FINDER and state.original_input.input.image.ecosystem != Ecosystem.GO) or
100+
(tool.name == ToolNames.FUNCTION_LIBRARY_VERSION_FINDER and state.original_input.input.image.ecosystem != Ecosystem.JAVA))
98101
]
99102
# Get tool names after filtering for dynamic guidance
100103
enabled_tool_names = [tool.name for tool in tools]

src/vuln_analysis/functions/cve_checklist.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,14 @@ async def cve_checklist(config: CVEChecklistToolConfig, builder: Builder):
5858
agent_config = builder.get_function_config(config.agent_name)
5959
agent_tool_names = agent_config.tool_names if hasattr(agent_config, 'tool_names') else None
6060

61-
async def generate_checklist_for_cve(cve_intel):
61+
async def generate_checklist_for_cve(cve_intel, ecosystem=None):
6262

6363
checklist = await generate_checklist(prompt=config.prompt,
6464
llm=llm,
6565
input_dict=cve_intel,
6666
tool_names=agent_tool_names,
67-
enable_llm_list_parsing=False)
67+
enable_llm_list_parsing=False,
68+
ecosystem=ecosystem)
6869

6970
checklist = await _parse_list([checklist])
7071

@@ -75,7 +76,15 @@ async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState:
7576
intel_df = data_utils.merge_intel_and_plugin_data_convert_to_dataframe(state.cve_intel)
7677
workflow_cve_intel = intel_df.to_dict(orient='records')
7778

78-
results = await asyncio.gather(*(generate_checklist_for_cve(cve_intel) for cve_intel in workflow_cve_intel))
79+
# Extract ecosystem for ecosystem-aware example selection
80+
ecosystem = None
81+
if (state.original_input and state.original_input.input
82+
and state.original_input.input.image
83+
and state.original_input.input.image.ecosystem):
84+
ecosystem = state.original_input.input.image.ecosystem.value
85+
86+
results = await asyncio.gather(*(generate_checklist_for_cve(cve_intel, ecosystem=ecosystem)
87+
for cve_intel in workflow_cve_intel))
7988

8089
state.checklist_plans = dict(results)
8190
return state

0 commit comments

Comments
 (0)