Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ast_java.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ class RouteDecl:
filename: str
start_line: int
end_line: int
# brownfield / B2a composition (graph_enrich.resolve_routes_for_method); not a Kuzu column.
# brownfield / B2a composition (graph_enrich.resolve_routes_for_method); not a graph column.
route_source_layer: str = "builtin"


Expand Down
132 changes: 66 additions & 66 deletions build_ast_graph.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions graph_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def collect_annotation_meta_chain(
) -> dict[str, frozenset[str]]:
"""Map annotation simple name → built-in simple names reachable via meta-annotations.

Single source of truth for Layer A: both the Kuzu writer and Lance chunk
Single source of truth for Layer A: both the LadybugDB writer and Lance chunk
enrichment must use this; they must not derive `meta_chain` from separate
filesystem walks. See ``PLAN-BROWNFIELD-ROLE-OVERRIDES`` §
*Single source of truth (REQUIRED — read before implementation)*.
Expand All @@ -350,7 +350,7 @@ def annotation_meta_decls_from_graph_tables(
"""From `build_ast_graph.GraphTables.types`, map @interface simple name -> meta anns.

Used for diagnostics; Layer A in production uses `collect_annotation_meta_chain`
(disk) so Kuzu and Lance share one index.
(disk) so LadybugDB and Lance share one index.
"""
decls: dict[str, tuple[str, ...]] = {}
first_fqn: dict[str, str] = {}
Expand Down Expand Up @@ -1702,7 +1702,7 @@ def enrich_chunk(


def symbol_id(kind: str, fqn: str, file_path: str = "", start_byte: int = 0) -> str:
"""Deterministic SHA1-based id for Kuzu Symbol nodes."""
"""Deterministic SHA1-based id for LadybugDB Symbol nodes."""
key = f"{kind}|{fqn}|{file_path}|{start_byte}".encode("utf-8")
return hashlib.sha1(key).hexdigest()

Expand Down
52 changes: 26 additions & 26 deletions java_codebase_rag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update, run_incremental_graph
from java_ontology import VALID_UNRESOLVED_CALL_REASONS

KUZU_INCREMENTAL_TRACKING_ISSUE_URL = "https://github.com/HumanBean17/java-codebase-rag/issues/73"
LADYBUG_INCREMENTAL_TRACKING_ISSUE_URL = "https://github.com/HumanBean17/java-codebase-rag/issues/73"

_INCREMENT_WARNING_LINES = (
"WARNING: AST graph (Kuzu) incremental rebuild is not yet implemented.",
"WARNING: AST graph (LadybugDB) incremental rebuild is not yet implemented.",
"The graph reflects the index state from the last `init` or `reprocess`,",
"which means `find`, `neighbors`, and `describe` may return stale results",
"for files changed since then.",
Expand All @@ -37,8 +37,8 @@
"For an up-to-date graph, run:",
" java-codebase-rag reprocess",
"",
"Track progress on Kuzu incremental rebuild:",
f" {KUZU_INCREMENTAL_TRACKING_ISSUE_URL}",
"Track progress on LadybugDB incremental rebuild:",
f" {LADYBUG_INCREMENTAL_TRACKING_ISSUE_URL}",
)

_REFRESH_DEPRECATION = (
Expand All @@ -47,7 +47,7 @@
)

_REPROCESS_DRIFT_VECTORS_ONLY = (
"java-codebase-rag reprocess: rebuilt vectors only; graph (code_graph.kuzu) was NOT rebuilt "
"java-codebase-rag reprocess: rebuilt vectors only; graph (code_graph.lbug) was NOT rebuilt "
"and may now reflect a stale source snapshot."
)

Expand Down Expand Up @@ -178,7 +178,7 @@ def _emit(value: Any) -> None:
print(json.dumps(payload, default=_jsonable, sort_keys=True, indent=None))


def _emit_increment_kuzu_warning() -> None:
def _emit_increment_ladybug_warning() -> None:
for line in _INCREMENT_WARNING_LINES:
print(line, file=sys.stderr)

Expand Down Expand Up @@ -289,7 +289,7 @@ def work() -> int:
print(file=sys.stderr, flush=True)
g = run_build_ast_graph(
source_root=cfg.source_root,
kuzu_path=cfg.kuzu_path,
ladybug_path=cfg.ladybug_path,
verbose=verbose,
quiet=bool(args.quiet),
env=env,
Expand Down Expand Up @@ -319,7 +319,7 @@ def _cmd_increment(args: argparse.Namespace) -> int:
# Check for --vectors-only flag
vectors_only = bool(getattr(args, "vectors_only", False))
if vectors_only:
_emit_increment_kuzu_warning()
_emit_increment_ladybug_warning()

def work() -> int:
env = cfg.subprocess_env()
Expand Down Expand Up @@ -350,7 +350,7 @@ def work() -> int:
# Run incremental graph update
g = run_incremental_graph(
source_root=cfg.source_root,
kuzu_path=cfg.kuzu_path,
ladybug_path=cfg.ladybug_path,
verbose=bool(args.verbose),
quiet=bool(args.quiet),
env=env,
Expand Down Expand Up @@ -437,7 +437,7 @@ def work() -> int:
if graph_only:
g = run_build_ast_graph(
source_root=cfg.source_root,
kuzu_path=cfg.kuzu_path,
ladybug_path=cfg.ladybug_path,
verbose=verbose,
quiet=bool(args.quiet),
env=env,
Expand Down Expand Up @@ -509,7 +509,7 @@ def _cmd_erase(args: argparse.Namespace) -> int:
cfg = _resolved_from_ns(args)
_startup_hints(cfg)
cfg.apply_to_os_environ()
to_describe: list[Path] = [cfg.kuzu_path, cfg.cocoindex_db]
to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db]
if cfg.index_dir.is_dir():
try:
import lancedb
Expand Down Expand Up @@ -546,8 +546,8 @@ def work() -> int:
)
elif drop.returncode != 0:
print(clip(drop.stderr, 4000), file=sys.stderr)
if cfg.kuzu_path.exists():
shutil.rmtree(cfg.kuzu_path, ignore_errors=True)
if cfg.ladybug_path.exists():
shutil.rmtree(cfg.ladybug_path, ignore_errors=True)
if cfg.cocoindex_db.exists():
try:
cfg.cocoindex_db.unlink()
Expand Down Expand Up @@ -577,17 +577,17 @@ def _cmd_meta(args: argparse.Namespace) -> int:
cfg = _resolved_from_ns(args)
_startup_hints(cfg)
cfg.apply_to_os_environ()
from kuzu_queries import KuzuGraph # lazy
from ladybug_queries import LadybugGraph # lazy

KuzuGraph._instance = None
KuzuGraph._instance_path = None
LadybugGraph._instance = None
LadybugGraph._instance_path = None
payload = server._graph_meta_output().model_dump()
payload["embedding_model"] = cfg.embedding_model
payload["embedding_device"] = cfg.embedding_device
payload["embedding_model_source"] = cfg.embedding_model_source
payload["embedding_device_source"] = cfg.embedding_device_source
payload["index_dir"] = str(cfg.index_dir.resolve())
payload["kuzu_path"] = str(cfg.kuzu_path.resolve())
payload["ladybug_path"] = str(cfg.ladybug_path.resolve())
payload["index_dir_source"] = cfg.index_dir_source
payload["hints_enabled"] = cfg.hints_enabled
payload["hints_enabled_source"] = cfg.hints_enabled_source
Expand Down Expand Up @@ -637,12 +637,12 @@ def _cmd_unresolved_calls_list(args: argparse.Namespace) -> int:
cfg = _resolved_from_ns(args)
_startup_hints(cfg)
cfg.apply_to_os_environ()
from kuzu_queries import KuzuGraph # lazy
from ladybug_queries import LadybugGraph # lazy

if not KuzuGraph.exists():
if not LadybugGraph.exists():
_emit({"success": False, "message": "Kuzu graph not found"})
return 1
graph = KuzuGraph.get()
graph = LadybugGraph.get()
rows = graph.list_unresolved_call_sites(
method_id=args.method_id,
reason=args.reason,
Expand All @@ -658,12 +658,12 @@ def _cmd_unresolved_calls_stats(args: argparse.Namespace) -> int:
cfg = _resolved_from_ns(args)
_startup_hints(cfg)
cfg.apply_to_os_environ()
from kuzu_queries import KuzuGraph # lazy
from ladybug_queries import LadybugGraph # lazy

if not KuzuGraph.exists():
if not LadybugGraph.exists():
_emit({"success": False, "message": "Kuzu graph not found"})
return 1
graph = KuzuGraph.get()
graph = LadybugGraph.get()
buckets = graph.stats_unresolved_call_sites(by=args.by)
total = sum(int(r.get("n") or 0) for r in buckets)
_emit({"success": True, "total": total, "by": args.by, "buckets": buckets})
Expand All @@ -683,12 +683,12 @@ def _cmd_analyze_pr(args: argparse.Namespace) -> int:
_emit({"success": False, "message": "Diff is empty"})
return 1
import pr_analysis # lazy
from kuzu_queries import KuzuGraph # lazy
from ladybug_queries import LadybugGraph # lazy

if not KuzuGraph.exists():
if not LadybugGraph.exists():
_emit({"success": False, "message": "Kuzu graph not found"})
return 1
graph = KuzuGraph.get()
graph = LadybugGraph.get()
report = pr_analysis.analyze_pr_pipeline(graph, diff_text)
_emit(pr_analysis.pr_report_to_dict(report))
return 0
Expand Down
15 changes: 7 additions & 8 deletions java_codebase_rag/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,9 @@ def resolved_sbert_model_for_process_env(import_time_default: str) -> str:
# Legacy env keys: never honored; detection-only hints name the replacement (if any).
_LEGACY_ENV_HINTS: tuple[tuple[str, str], ...] = (
("LANCEDB_URI", "JAVA_CODEBASE_RAG_INDEX_DIR"),
("KUZU_DB_PATH", "JAVA_CODEBASE_RAG_INDEX_DIR (Kuzu lives at <index_dir>/code_graph.kuzu)"),
("LANCEDB_MCP_PROJECT_ROOT", "cwd or --source-root (no env replacement)"),
("LANCEDB_MCP_ALLOW_REFRESH", "(removed; use init / increment / reprocess / erase)"),
("LANCEDB_MCP_GRAPH_ENABLED", "(removed; graph is used when code_graph.kuzu exists)"),
("LANCEDB_MCP_GRAPH_ENABLED", "(removed; graph is used when code_graph.lbug exists)"),
("LANCEDB_MCP_MICROSERVICE_ROOTS", "microservice_roots: in .java-codebase-rag.yml"),
("LANCEDB_MCP_DEBUG_CONTEXT", ENV_DEBUG_CONTEXT),
("LANCEDB_MCP_RUN_HEAVY", ENV_RUN_HEAVY),
Expand Down Expand Up @@ -182,7 +181,7 @@ def load_yaml_mapping(source_root: Path) -> dict[str, Any]:
class ResolvedOperatorConfig:
source_root: Path
index_dir: Path
kuzu_path: Path
ladybug_path: Path
cocoindex_db: Path
embedding_model: str
embedding_device: str | None
Expand All @@ -193,7 +192,7 @@ class ResolvedOperatorConfig:
hints_enabled_source: SettingSource

def apply_to_os_environ(self) -> None:
"""Make downstream modules (server, kuzu_queries, flows) see a consistent environment.
"""Make downstream modules (server, ladybug_queries, flows) see a consistent environment.

When ``embedding_device`` is unset, ``SBERT_DEVICE`` is not removed from ``os.environ`` so
a long-lived host process is not mutated for unrelated callers; subprocesses still use
Expand Down Expand Up @@ -369,12 +368,12 @@ def resolve_operator_config(
yaml_path=("hints", "enabled"),
default=True,
)
ku = index_dir / "code_graph.kuzu"
ku = index_dir / "code_graph.lbug"
coco = index_dir / "cocoindex.db"
return ResolvedOperatorConfig(
source_root=root,
index_dir=index_dir,
kuzu_path=ku,
ladybug_path=ku,
cocoindex_db=coco,
embedding_model=model,
embedding_device=device,
Expand All @@ -387,9 +386,9 @@ def resolve_operator_config(


def index_dir_has_existing_artifacts(index_dir: Path) -> tuple[bool, list[str]]:
"""True if Kuzu graph dir or any Lance table already exists under index_dir."""
"""True if graph dir or any Lance table already exists under index_dir."""
paths: list[str] = []
ku = index_dir / "code_graph.kuzu"
ku = index_dir / "code_graph.lbug"
if ku.exists():
paths.append(str(ku.resolve()))
if index_dir.is_dir():
Expand Down
2 changes: 1 addition & 1 deletion java_codebase_rag/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ def run_init_if_needed(
# Run AST graph build
g = run_build_ast_graph(
source_root=cfg.source_root,
kuzu_path=cfg.kuzu_path,
ladybug_path=cfg.ladybug_path,
verbose=not quiet,
quiet=quiet,
env=env,
Expand Down
12 changes: 6 additions & 6 deletions java_codebase_rag/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def run_cocoindex_drop(env: dict[str, str], *, quiet: bool) -> subprocess.Comple
def run_build_ast_graph(
*,
source_root: Path,
kuzu_path: Path,
ladybug_path: Path,
verbose: bool,
quiet: bool = False,
env: dict[str, str] | None = None,
Expand All @@ -219,8 +219,8 @@ def run_build_ast_graph(
str(builder),
"--source-root",
str(source_root),
"--kuzu-path",
str(kuzu_path),
"--ladybug-path",
str(ladybug_path),
]
# Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
# Default passes --verbose so the builder emits per-pass progress lines,
Expand Down Expand Up @@ -254,7 +254,7 @@ def run_build_ast_graph(
def run_incremental_graph(
*,
source_root: Path,
kuzu_path: Path,
ladybug_path: Path,
verbose: bool,
quiet: bool = False,
env: dict[str, str] | None = None,
Expand All @@ -273,8 +273,8 @@ def run_incremental_graph(
str(builder),
"--source-root",
str(source_root),
"--kuzu-path",
str(kuzu_path),
"--ladybug-path",
str(ladybug_path),
"--incremental",
]
# Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
Expand Down
2 changes: 1 addition & 1 deletion java_index_flow_lancedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
LanceDB requires a single primary key per table; each chunk gets a UUID `id`.

Environment:
JAVA_CODEBASE_RAG_INDEX_DIR — Lance tables + Kuzu + cocoindex state (default: ./.java-codebase-rag)
JAVA_CODEBASE_RAG_INDEX_DIR — Lance tables + LadybugDB + cocoindex state (default: ./.java-codebase-rag)
JAVA_CODEBASE_RAG_SOURCE_ROOT — Java repo root for indexing (optional; else cocoindex cwd)
SBERT_MODEL / SBERT_DEVICE — embedding (optional; YAML also supported via java-codebase-rag CLI)

Expand Down
2 changes: 1 addition & 1 deletion java_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
@dataclass(frozen=True)
class EdgeAttr:
name: str
kuzu_type: str
graph_type: str
purpose: str


Expand Down
Loading
Loading