Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 82 additions & 16 deletions build_ast_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,54 @@ def _find_dependents(conn: ladybug.Connection, changed_node_ids: set[str]) -> se
return dependent_files


def _find_annotation_dependents(conn: ladybug.Connection, changed_node_ids: set[str]) -> set[str]:
"""Find files that USE an annotation whose DEFINITION is among the changed nodes.

Annotation usage is a node property (``annotations`` STRING[]), not a
Symbol->Symbol edge, so `_find_dependents` — which walks edges — never pulls
annotation users into scope. When an annotation definition changes (e.g.
``@interface Foo`` gains a meta-annotation that shifts the Layer-A chain in
`resolve_role_and_capabilities`), every type carrying ``@Foo`` may need its
``role``/``capabilities`` recomputed or it goes stale until the next full
rebuild. Return those users' files so the orchestrator treats them as
dependents (re-parsed, role re-SET); the expansion cap bounds the scope.

Scope is direct usage only: a user of an annotation that transitively
composes the changed one (e.g. ``@A`` where ``@A`` is meta-annotated with the
changed ``@B``) is NOT pulled in — that reverse-chain walk is left to a
future hardening pass. The direct case covers the dominant real-world shape
(a stereotype annotation applied directly to many types).
"""
if not changed_node_ids:
return set()
# Changed annotation definitions → the simple names users reference them by.
# Runs before `_delete_file_scope`, so the def nodes still exist.
name_result = conn.execute(
"MATCH (s:Symbol) WHERE s.id IN $ids AND s.kind = 'annotation' RETURN s.name",
{"ids": list(changed_node_ids)},
)
names: list[str] = []
while name_result.has_next():
nm = name_result.get_next()[0]
if nm:
names.append(nm)
if not names:
return set()
dependent_files: set[str] = set()
for nm in names:
user_result = conn.execute(
"MATCH (s:Symbol) "
"WHERE list_contains(s.annotations, $nm) AND s.filename <> '' "
"RETURN DISTINCT s.filename",
{"nm": nm},
)
while user_result.has_next():
fn = user_result.get_next()[0]
if fn:
dependent_files.add(fn)
return dependent_files


def _delete_file_scope(
conn: ladybug.Connection,
changed_files: set[str],
Expand Down Expand Up @@ -3089,6 +3137,19 @@ def _existing_node_ids(conn: ladybug.Connection) -> set[str]:
"n.signature = $signature, n.parent_id = $parent_id, n.resolved = $resolved"
)

# Refresh every mutable Route field on an existing Route node by id. Mirrors the
# `_write_nodes_impl` Symbol pattern (bulk COPY new rows + per-row SET existing
# ones) so the global pass 5-6 step no longer needs a per-row MERGE upsert.
# Field list mirrors `_ROUTE_COLUMNS` minus `id`.
_SET_ROUTE_BY_ID = (
"MATCH (r:Route {id: $id}) "
"SET r.kind = $kind, r.framework = $framework, r.method = $method, "
"r.path = $path, r.path_template = $path_template, r.path_regex = $path_regex, "
"r.topic = $topic, r.broker = $broker, r.feign_name = $feign_name, r.feign_url = $feign_url, "
"r.microservice = $microservice, r.module = $module, r.filename = $filename, "
"r.start_line = $start_line, r.end_line = $end_line, r.resolved = $resolved"
)

_REL_EXTENDS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"]
_REL_IMPLEMENTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved"]
_REL_INJECTS_COLUMNS = ["FROM", "TO", "source_file", "dst_name", "dst_fqn", "resolved", "mechanism", "annotation", "field_or_param"]
Expand Down Expand Up @@ -3776,6 +3837,12 @@ def incremental_rebuild(
# Find dependents
dependent_files = _find_dependents(conn, changed_node_ids)

# Annotation-definition change: also pull in files that USE the changed
# annotation. Annotation usage is a node property, not a Symbol->Symbol
# edge, so `_find_dependents` misses them and their role (derived from
# the project-wide meta-chain) would go stale. See PR-P5b.
dependent_files |= _find_annotation_dependents(conn, changed_node_ids)

# Union changed files with dependents
scope_files = changed_files | dependent_files

Expand Down Expand Up @@ -3940,22 +4007,21 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
Route nodes (created by pass5 for cross-service calls) that wouldn't
otherwise exist in LadybugDB.
"""
# Upsert every route via MERGE. `tables.routes_rows` is the full route set
# (pass4 routes + pass5 phantom routes), not just phantoms; MERGE is
# idempotent against routes already written during the scoped step, so it
# neither duplicates nor drops them. This is the one remaining per-row graph
# write — converting it to bulk COPY requires filtering against existing
# route ids to reproduce the dedup, and is left as a future optimization.
for row in tables.routes_rows:
conn.execute(
"MERGE (r:Route {id: $id}) "
"SET r.kind = $kind, r.framework = $framework, r.method = $method, "
"r.path = $path, r.path_template = $path_template, r.path_regex = $path_regex, "
"r.topic = $topic, r.broker = $broker, r.feign_name = $feign_name, r.feign_url = $feign_url, "
"r.microservice = $microservice, r.module = $module, r.filename = $filename, "
"r.start_line = $start_line, r.end_line = $end_line, r.resolved = $resolved",
asdict(row),
)
# Bulk-write routes, mirroring `_write_nodes_impl`: COPY the rows that are new
# to the DB, and SET every mutable field on the routes already present (the
# caller does NOT delete routes, only Clients/Producers — see the global
# pass 5-6 block). `tables.routes_rows` is the full route set (pass4 routes +
# pass5 phantom routes), not just phantoms, so the SET keeps existing routes'
# properties in sync with pass5's cross-service enrichment while the COPY
# materializes phantoms that have no node yet. Replaces the last per-row
# graph write (MERGE upsert).
route_rows = [asdict(row) for row in tables.routes_rows]
existing_route_ids = _existing_node_ids(conn)
new_route_rows = [r for r in route_rows if r["id"] not in existing_route_ids]
_bulk_copy(conn, "Route", _ROUTE_COLUMNS, new_route_rows)
for r in route_rows:
if r["id"] in existing_route_ids:
conn.execute(_SET_ROUTE_BY_ID, r)

# Build node_id lookup for members and types
member_by_id = {m.node_id: m for m in tables.members}
Expand Down
166 changes: 160 additions & 6 deletions tests/test_incremental_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,12 +1148,166 @@ def role_of(fqn: str) -> str:
"(PR-P4 regression: skip-if-exists dropped the upsert)"
)

def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None:
"""Pass5/6 Route MERGE dedup is preserved after bulk conversion.
def test_incremental_pulls_in_annotation_users_on_def_change(
self, tmp_path: Path
) -> None:
"""Editing an annotation definition refreshes its direct users' roles.

Regression guard for the PR-P5b fix. Annotation usage is a node property
(``annotations`` STRING[]), not a Symbol->Symbol edge, so `_find_dependents`
— which walks edges — never pulls annotation users into scope. When an
annotation definition changes (e.g. ``@interface MyService`` gains a
``@Service`` meta-annotation that shifts the Layer-A chain), types
carrying ``@MyService`` need their ``role`` recomputed or they go stale.

Unlike `test_incremental_refreshes_dependent_role_on_meta_chain_change`
(PR-P4), the user here has NO edge to any changed node: it is pulled in
purely by the annotation-usage expansion (`_find_annotation_dependents`),
so this isolates the scope-tracking fix from the preserved-dependent
property refresh.
"""
from build_ast_graph import incremental_rebuild
from graph_enrich import collect_annotation_meta_chain
from _builders import build_ladybug_full_into

source_root = tmp_path / "src"
source_root.mkdir()
index_dir = tmp_path / "index"
index_dir.mkdir()
ladybug_path = index_dir / "code_graph.lbug"
java = source_root / "pkg"
java.mkdir(parents=True)

# Svc carries @MyService but calls/extends nothing — no edge to any other
# node, so `_find_dependents` cannot pull it in. Only annotation usage can.
(java / "MyService.java").write_text(
"package pkg; public @interface MyService {}\n", encoding="utf-8"
)
(java / "Svc.java").write_text(
"package pkg;\n@MyService\npublic class Svc {}\n", encoding="utf-8"
)
build_ladybug_full_into(source_root, ladybug_path)

# Edit ONLY the annotation definition: add a @Service meta-annotation so
# the chain maps MyService → Service and Svc's role flips OTHER → SERVICE.
(java / "MyService.java").write_text(
"package pkg;\nimport org.springframework.stereotype.Service;\n"
"@Service\npublic @interface MyService {}\n",
encoding="utf-8",
)
collect_annotation_meta_chain.cache_clear()
result = incremental_rebuild(source_root, ladybug_path, verbose=False)
assert result.mode == "incremental", f"expected incremental, got {result.mode}"
assert result.dependents_reprocessed >= 1, (
"Svc should be pulled in as an annotation-dependent, not just the def file"
)

def role_of(fqn: str) -> str:
db = ladybug.Database(str(ladybug_path))
conn = ladybug.Connection(db)
r = conn.execute("MATCH (n:Symbol {fqn: $fqn}) RETURN n.role", {"fqn": fqn})
v = r.get_next()[0] if r.has_next() else None
conn.close()
db.close()
return v

# Svc was pulled in by annotation usage (no edge to the changed def); its
# role must refresh to SERVICE to match a full rebuild of this state.
assert role_of("pkg.Svc") == "SERVICE", (
"annotation user's role not refreshed after annotation-def change "
"(PR-P5b regression: annotation users not pulled into incremental scope)"
)

Creates a corpus where pass5/6 re-emits an existing route and verifies
no duplicate Route rows after incremental rebuild (the retained MERGE
dedups against routes written during the scoped step).
def test_incremental_overrides_not_duplicated_for_non_scope_subtype(
self, tmp_path: Path
) -> None:
"""A non-scope subtype's OVERRIDES edge is not duplicated on increment.

Invariant guard for the OVERRIDES path. Unlike DECLARES (derived purely
from a member's own ``parent_id``/``node_id``, which is why a loaded
non-scope member could duplicate it — fixed in PR-P4), an OVERRIDES pair
needs the subtype's supertype via `_direct_supertype_ids`, which reads
`tables.extends_rows`/`implements_rows`. Those edge tables are populated
only by `pass2_edges` over *parsed* files; cross-file resolution stubs
(`loaded_from_db`) load nodes but NOT edges, so a loaded subtype has no
derivable supertype and `_populate_overrides_rows` never re-emits its
OVERRIDES — the edge (``source_file`` = its out-of-scope file) survives
untouched, matching a full rebuild.

This test pins that behavior down. If stub edge-loading is ever added as
an optimization, this guard will catch the resulting duplication (the
same class of bug PR-P4 fixed for DECLARES).

Corpus: `TImpl implements T` overrides `foo()` in non-scope files; an
unrelated `Other.java` change triggers the increment so `TImpl`/`T` are
loaded as stubs. The increment must keep exactly one OVERRIDES edge.
"""
from build_ast_graph import incremental_rebuild
from _builders import build_ladybug_full_into

source_root = tmp_path / "src"
source_root.mkdir()
index_dir = tmp_path / "index"
index_dir.mkdir()
ladybug_path = index_dir / "code_graph.lbug"
java = source_root / "pkg"
java.mkdir(parents=True)

(java / "T.java").write_text(
"package pkg; public interface T { void foo(); }\n", encoding="utf-8"
)
(java / "TImpl.java").write_text(
"package pkg; public class TImpl implements T { public void foo() {} }\n",
encoding="utf-8",
)
(java / "Other.java").write_text(
"package pkg; public class Other { public void go() {} }\n", encoding="utf-8"
)
build_ladybug_full_into(source_root, ladybug_path)

# Change Other.java only. Nothing references Other, so the scope is just
# {Other.java}; TImpl/T are non-scope and loaded as resolution stubs.
(java / "Other.java").write_text(
"package pkg; public class Other { public void go() {} public void more() {} }\n",
encoding="utf-8",
)
result = incremental_rebuild(source_root, ladybug_path, verbose=False)
assert result.mode == "incremental", f"expected incremental, got {result.mode}"

def overrides_count(path: Path) -> int:
db = ladybug.Database(str(path))
conn = ladybug.Connection(db)
r = conn.execute("MATCH ()-[o:OVERRIDES]->() RETURN count(o)")
n = r.get_next()[0] if r.has_next() else 0
conn.close()
db.close()
return n

# Exactly one override relationship exists (TImpl.foo → T.foo); a
# duplicate (2) is the bug. Cross-check against a fresh full rebuild of
# the identical final state — the increment must match it.
full_dir = tmp_path / "full"
full_dir.mkdir()
full_path = full_dir / "code_graph.lbug"
build_ladybug_full_into(source_root, full_path)

assert overrides_count(ladybug_path) == 1, (
"non-scope OVERRIDES edge duplicated on increment "
"(PR-P5a regression: loaded subtype method re-emitted)"
)
assert overrides_count(ladybug_path) == overrides_count(full_path), (
"incremental OVERRIDES count diverged from full rebuild"
)

def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None:
"""Pass5/6 Route write does not duplicate existing routes.

Creates a corpus where pass5/6 re-emits an existing route and verifies
no duplicate Route rows after incremental rebuild. The global step now
bulk-writes routes (COPY new ids + SET existing ids — see PR-P5c),
replacing the per-row MERGE upsert this test was originally written for;
the name is kept for plan-reference continuity. The SET branch is what
dedups against routes written during the scoped step here.
"""
from build_ast_graph import incremental_rebuild, write_ladybug

Expand Down Expand Up @@ -1216,7 +1370,7 @@ def test_incremental_route_merge_dedup_preserved(self, tmp_path: Path) -> None:
route_count = route_count_result.get_next()[0]

# Should be exactly 1 route (no duplicates)
assert route_count == 1, f"Expected 1 route, found {route_count} (MERGE dedup failed)"
assert route_count == 1, f"Expected 1 route, found {route_count} (route dedup failed)"

conn.close()

Expand Down
Loading