Skip to content

Commit bc9d0cc

Browse files
tmihalacTheodor Mihalache
authored andcommitted
feat: java transitive search - added support for project pom aggregator
Signed-off-by: Theodor Mihalache <[email protected]>
1 parent 10ada84 commit bc9d0cc

File tree

4 files changed

+188
-58
lines changed

4 files changed

+188
-58
lines changed

kustomize/base/exploit_iq_service.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,13 @@ spec:
123123
fieldPath: metadata.namespace
124124
- name: GOMODCACHE
125125
value: /exploit-iq-package-cache/go/pkg/mod
126+
- name: JAVA_MAVEN_DEFAULT_SETTINGS_FILE_PATH
127+
value: /maven-config/settings.xml
126128
volumeMounts:
127129
- name: config
128130
mountPath: /configs
131+
- name: maven-settings-config
132+
mountPath: /maven-config
129133
- name: cache
130134
mountPath: /exploit-iq-data
131135
- name: package-cache
@@ -137,6 +141,9 @@ spec:
137141
- name: config
138142
configMap:
139143
name: exploit-iq-config
144+
- name: maven-settings-config
145+
configMap:
146+
name: exploit-iq-maven-settings-config
140147
- name: cache
141148
persistentVolumeClaim:
142149
claimName: exploit-iq-data

kustomize/base/kustomization.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ configMapGenerator:
6161
files:
6262
- excludes.json
6363
- includes.json
64+
- name: exploit-iq-maven-settings-config
65+
files:
66+
- settings.xml
6467

6568
patches:
6669
- path: ips-patch.json

kustomize/base/settings.xml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<settings xmlns="http://maven.apache.org/SETTINGS/1.2.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.2.0
5+
https://maven.apache.org/xsd/settings-1.2.0.xsd">
6+
<profiles>
7+
<profile>
8+
<id>red-hat</id>
9+
<repositories>
10+
<repository>
11+
<id>red-hat-ga</id>
12+
<url>https://maven.repository.redhat.com/ga</url>
13+
</repository>
14+
</repositories>
15+
<pluginRepositories>
16+
<pluginRepository>
17+
<id>red-hat-ga</id>
18+
<url>https://maven.repository.redhat.com/ga</url>
19+
<releases>
20+
<enabled>true</enabled>
21+
</releases>
22+
<snapshots>
23+
<enabled>false</enabled>
24+
</snapshots>
25+
</pluginRepository>
26+
</pluginRepositories>
27+
</profile>
28+
</profiles>
29+
<activeProfiles>
30+
<activeProfile>red-hat</activeProfile>
31+
</activeProfiles>
32+
33+
<mirrors>
34+
<!-- Override Maven's built-in "maven-default-http-blocker" by reusing the same id,
35+
but making it not match external:http:* anymore. -->
36+
<mirror>
37+
<id>maven-default-http-blocker</id>
38+
<name>Disabled default HTTP blocker</name>
39+
40+
<!-- Make this mirror effectively never apply -->
41+
<mirrorOf>__no_such_repo_id__</mirrorOf>
42+
43+
<!-- URL is irrelevant if mirrorOf never matches, but must be present -->
44+
<url>https://repo1.maven.org/maven2</url>
45+
</mirror>
46+
</mirrors>
47+
48+
</settings>

src/exploit_iq_commons/utils/dep_tree.py

Lines changed: 130 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -841,9 +841,10 @@ def __init__(self, query: str):
841841
self._query = query
842842

843843
def install_dependencies(self, manifest_path: Path):
844+
settings_path = os.getenv('JAVA_MAVEN_DEFAULT_SETTINGS_FILE_PATH','../../../../kustomize/base/settings.xml')
844845
source_path = "dependencies-sources"
845-
subprocess.run(["mvn", "dependency:copy-dependencies", "-Dclassifier=sources",
846-
f"-DoutputDirectory={source_path}"], cwd=manifest_path)
846+
subprocess.run(["mvn", "-s", settings_path, "dependency:copy-dependencies", "-Dclassifier=sources",
847+
"-DincludeScope=runtime", f"-DoutputDirectory={manifest_path.resolve()}/{source_path}"], cwd=manifest_path)
847848

848849
full_source_path = manifest_path / source_path
849850
for jar in full_source_path.glob("*-sources.jar"):
@@ -855,25 +856,59 @@ def install_dependencies(self, manifest_path: Path):
855856
zf.extractall(dest)
856857

857858
def build_tree(self, manifest_path: Path) -> dict[str, list[str]]:
859+
settings_path = os.getenv("JAVA_MAVEN_DEFAULT_SETTINGS_FILE_PATH", "../../../../kustomize/base/settings.xml")
858860
dependency_file = manifest_path / "dependency_tree.txt"
859-
package_name = self._query.split(',')[0]
861+
package_name = self._query.split(",")[0]
860862

861863
if is_maven_gav(package_name):
862-
with open(dependency_file, "w") as f:
863-
subprocess.run(["mvn", "dependency:tree",
864-
f"-Dincludes={add_missing_jar_string(package_name)}",
865-
"-Dverbose"], cwd=manifest_path, stdout=f, check=True)
864+
subprocess.run(
865+
[
866+
"mvn",
867+
"com.github.ferstl:depgraph-maven-plugin:4.0.3:aggregate",
868+
"-s", settings_path,
869+
"-DgraphFormat=text",
870+
"-DshowGroupIds",
871+
"-DshowVersions",
872+
"-DshowTypes",
873+
"-DoutputDirectory=.",
874+
"-DoutputFileName=dependency_tree.txt",
875+
"-DclasspathScope=runtime",
876+
f"-DtargetIncludes={add_missing_jar_string(package_name)}",
877+
],
878+
cwd=manifest_path,
879+
check=True,
880+
)
866881
else:
867-
with open(dependency_file, "w") as f:
868-
subprocess.run(["mvn", "dependency:tree",
869-
"-Dverbose"], cwd=manifest_path, stdout=f, check=True)
882+
subprocess.run(
883+
[
884+
"mvn",
885+
"com.github.ferstl:depgraph-maven-plugin:4.0.3:aggregate",
886+
"-s", settings_path,
887+
"-DgraphFormat=text",
888+
"-DshowGroupIds",
889+
"-DshowVersions",
890+
"-DshowTypes",
891+
"-DoutputDirectory=.",
892+
"-DclasspathScope=runtime",
893+
"-DoutputFileName=dependency_tree.txt",
894+
],
895+
cwd=manifest_path,
896+
check=True,
897+
)
870898

871899
with dependency_file.open("r", encoding="utf-8") as f:
872900
lines = f.readlines()
873901

874902
parent, graph = self.__build_upside_down_dependency_graph(lines)
875903

876-
# Mark the top level for
904+
# Mark *all* roots, not just the first one.
905+
# A "root" is any node with no parents in the computed parent-chain list.
906+
# This preserves old single-root behavior and fixes multi-root / multi-parent trees.
907+
roots = [node for node, parents in graph.items() if not parents]
908+
for r in roots:
909+
graph[r] = [ROOT_LEVEL_SENTINEL]
910+
911+
# Backward-compatible: keep the old behavior too (harmless if already set above)
877912
graph[parent] = [ROOT_LEVEL_SENTINEL]
878913

879914
return graph
@@ -883,16 +918,16 @@ def __build_upside_down_dependency_graph(
883918
) -> Tuple[str, Dict[str, List[str]]]:
884919
root: str = ""
885920
stack: List[str] = []
886-
# coord -> set of direct parents (possibly multiple)
887-
graph_sets: Dict[str, set] = {}
921+
graph_sets: Dict[str, set[str]] = {} # coord -> set of direct parents
888922

889923
for line in dependency_lines:
890924
depth, coord = self.__parse_dependency_line(line)
891925
if depth is None or coord is None:
892926
continue
893927

894928
if depth == 0:
895-
# start (or restart) a root line
929+
# depgraph aggregate can emit multiple top-level roots. Keep the first as "root"
930+
# for backward compatibility, but still record others as separate roots in graph_sets.
896931
if not root:
897932
root = coord
898933
stack = [coord]
@@ -905,91 +940,128 @@ def __build_upside_down_dependency_graph(
905940

906941
parent = stack[-1] if stack else None
907942
if parent is not None:
908-
graph_sets.setdefault(coord, set()).add(parent)
943+
graph_sets.setdefault(coord, set()).add(parent) # supports multiple direct parents
909944
graph_sets.setdefault(parent, set())
910945
else:
911946
graph_sets.setdefault(coord, set())
912947

913948
stack.append(coord)
914949

915-
# ---------- second phase: all parents (direct + transitive) without duplicates ----------
916-
917950
def build_parent_chain(node: str) -> List[str]:
918951
"""
919-
For a given coord, return a flat list of *all* parents reachable
920-
via any path up to the root, with no duplicates.
921-
922-
Order: breadth-first from nearest parents outward.
952+
Return a flat list of all parents reachable via any path, no duplicates.
953+
Deterministic BFS order: nearest parents outward.
923954
"""
924955
result: List[str] = []
925956
seen: set[str] = set()
926957

927-
q = deque(graph_sets.get(node, ()))
958+
q = deque(sorted(graph_sets.get(node, ())))
928959
while q:
929-
parent = q.popleft()
930-
if parent in seen:
960+
p = q.popleft()
961+
if p in seen:
931962
continue
932-
seen.add(parent)
933-
result.append(parent)
963+
seen.add(p)
964+
result.append(p)
934965

935-
# enqueue this parent's parents
936-
for gp in graph_sets.get(parent, ()):
966+
for gp in sorted(graph_sets.get(p, ())):
937967
if gp not in seen:
938968
q.append(gp)
939969

940970
return result
941971

942-
graph: Dict[str, List[str]] = {
943-
coord: build_parent_chain(coord) for coord in graph_sets.keys()
944-
}
945-
972+
graph: Dict[str, List[str]] = {coord: build_parent_chain(coord) for coord in graph_sets.keys()}
946973
return root, graph
947974

948975
def __parse_dependency_line(self, line: str) -> Tuple[Optional[int], Optional[str]]:
949-
if not line.startswith("[INFO]"):
976+
"""
977+
Parse one dependency line from depgraph's graphFormat=text output.
978+
979+
Expected depgraph token shape (after indentation/branch prefix):
980+
groupId:artifactId:version:type:scope
981+
Example from your file:
982+
org.apache.activemq:artemis-openwire-protocol:2.28.0:bundle:compile :contentReference[oaicite:3]{index=3}
983+
984+
We return (depth, "groupId:artifactId:version") and ignore type/scope/optional marker.
985+
986+
Also tolerates Maven log prefixes like "[INFO] " if they appear.
987+
"""
988+
raw = (line or "").rstrip("\n")
989+
if not raw.strip():
990+
return None, None
991+
992+
# If Maven stdout and depgraph output got mixed, you may see mid-line "[INFO]" injection.
993+
# Those lines are not safely recoverable as dependency tokens.
994+
if "[INFO]" in raw and not raw.lstrip().startswith("[INFO]"):
950995
return None, None
951996

952-
# Keep indentation blocks; Maven prints exactly one space after "[INFO]"
953-
s = line[6:]
954-
if s.startswith(" "):
955-
s = s[1:]
997+
s = raw.lstrip()
956998

957-
# Skip non-tree lines early
958-
if (not s
959-
or s.startswith(("---", "BUILD", "Scanning", "Finished", "Total time"))
960-
or ":" not in s):
999+
# Strip Maven log prefix if present
1000+
if s.startswith("[INFO]"):
1001+
s = s[6:].lstrip()
1002+
1003+
# Skip headers and build noise
1004+
if (
1005+
not s
1006+
or "Dependency graph:" in s
1007+
or s.startswith(("---", "BUILD", "Reactor Summary", "Total time", "Finished at", "Scanning"))
1008+
or s.startswith("[") # other log levels like [WARNING], [ERROR], etc.
1009+
or ":" not in s
1010+
):
9611011
return None, None
9621012

963-
# indent blocks ("| " or " ") + optional "+- " or "\- " + rest
964-
m = re.match(r'^(?P<indent>(?:\| | )*)(?P<branch>[+\\]-\s)?(?P<rest>.+)$', s)
1013+
# depgraph indentation blocks ("| " or " ") + optional "+- " or "\- " + rest
1014+
m = re.match(r"^(?P<indent>(?:\| | )*)(?P<branch>[+\\]-\s)?(?P<rest>.+)$", s)
9651015
if not m:
9661016
return None, None
9671017

968-
# Each indent block is 3 chars; add 1 if a branch token is present
969-
depth = (len(m.group('indent')) // 3) + (1 if m.group('branch') else 0)
970-
rest = m.group('rest').strip()
1018+
depth = (len(m.group("indent")) // 3) + (1 if m.group("branch") else 0)
1019+
rest = m.group("rest").strip()
9711020

9721021
# First token up to whitespace or ')', optionally starting with '('
973-
m2 = re.match(r'^\(?([^\s\)]+)\)?', rest)
1022+
m2 = re.match(r"^\(?([^\s\)]+)\)?", rest)
9741023
if not m2:
9751024
return None, None
9761025

977-
token = m2.group(1) # e.g., io.foo:bar:jar:1.2.3:compile
978-
parts = token.split(':')
1026+
token = m2.group(1) # e.g. com.google.guava:guava:32.0.1-jre:jar:compile
1027+
parts = token.split(":")
9791028

980-
# Drop trailing Maven scope if present
981-
scopes = {'compile', 'runtime', 'test', 'provided', 'system', 'import'}
1029+
scopes = {"compile", "runtime", "test", "provided", "system", "import"}
9821030
if parts and parts[-1] in scopes:
9831031
parts = parts[:-1]
9841032

985-
# Expect group:artifact:type:(classifier:)version — return without the type
986-
if len(parts) >= 4:
987-
group, artifact = parts[0], parts[1]
988-
version = parts[-1]
989-
coord = f"{group}:{artifact}:{version}"
990-
return depth, coord
1033+
if len(parts) < 3:
1034+
return None, None
1035+
1036+
group, artifact = parts[0], parts[1]
1037+
1038+
# depgraph text format puts version in position 2:
1039+
# group:artifact:version:type (scope already removed)
1040+
# We detect that by checking whether the last part is a packaging/type marker.
1041+
packaging = {"jar", "war", "pom", "bundle", "maven-plugin", "ear", "ejb", "rar", "zip", "test-jar"}
1042+
1043+
def looks_like_version(v: str) -> bool:
1044+
return any(ch.isdigit() for ch in v)
1045+
1046+
version: Optional[str] = None
1047+
1048+
# depgraph: group:artifact:version:type
1049+
if len(parts) >= 4 and parts[-1] in packaging and looks_like_version(parts[2]):
1050+
version = parts[2]
1051+
# depgraph (rare): group:artifact:version:type:classifier
1052+
elif len(parts) >= 5 and parts[-2] in packaging and looks_like_version(parts[2]):
1053+
version = parts[2]
1054+
else:
1055+
# Fallback for other Maven-like formats where version is last
1056+
if looks_like_version(parts[-1]):
1057+
version = parts[-1]
1058+
elif looks_like_version(parts[2]):
1059+
version = parts[2]
1060+
else:
1061+
return None, None
9911062

992-
return None, None
1063+
coord = f"{group}:{artifact}:{version}"
1064+
return depth, coord
9931065

9941066
class PythonDependencyTreeBuilder(DependencyTreeBuilder):
9951067

0 commit comments

Comments
 (0)