From 55da9ca4123cdc0b0bb921e58f40c5d293d3ca96 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sun, 7 Jun 2026 17:17:22 +0300 Subject: [PATCH 1/2] fix walk-up discovery to also detect index directory discover_project_root() only looked for .java-codebase-rag.yml config files, missing .java-codebase-rag/ index directories. This caused MCP tools to fail when started from a microservice subdirectory because walk-up returned None and fell back to cwd (no index there). Empty index directories are skipped to avoid stale artifacts from falsely anchoring discovery. Co-Authored-By: Claude Opus 4.7 --- java_codebase_rag/cli.py | 25 ++++++++++------- java_codebase_rag/config.py | 21 +++++++++++--- tests/test_config.py | 56 +++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 14 deletions(-) diff --git a/java_codebase_rag/cli.py b/java_codebase_rag/cli.py index a3281e71..5667820e 100644 --- a/java_codebase_rag/cli.py +++ b/java_codebase_rag/cli.py @@ -229,18 +229,23 @@ def _add_verbosity_flags(p: argparse.ArgumentParser) -> None: def _cmd_init(args: argparse.Namespace) -> int: cfg = _resolved_from_ns(args) - # Check for parent config - from java_codebase_rag.config import discover_project_root, YAML_CONFIG_FILENAMES + # Check for parent config or index + from java_codebase_rag.config import discover_project_root, find_yaml_config_file parent_config_dir = discover_project_root(cfg.source_root.parent) if parent_config_dir is not None: - parent_config = parent_config_dir / YAML_CONFIG_FILENAMES[0] - if not parent_config.is_file(): - parent_config = parent_config_dir / YAML_CONFIG_FILENAMES[1] - print( - f"Warning: found existing config at {parent_config}. " - f"Creating a new project here will create a separate index.", - file=sys.stderr, - ) + parent_config = find_yaml_config_file(parent_config_dir) + if parent_config is not None: + print( + f"Warning: found existing config at {parent_config}. " + f"Creating a new project here will create a separate index.", + file=sys.stderr, + ) + else: + print( + f"Warning: found existing index at {parent_config_dir / '.java-codebase-rag'}. " + f"Creating a new project here will create a separate index.", + file=sys.stderr, + ) _startup_hints(cfg) cfg.apply_to_os_environ() occupied, paths = index_dir_has_existing_artifacts(cfg.index_dir) diff --git a/java_codebase_rag/config.py b/java_codebase_rag/config.py index ada9a540..3504fbd2 100644 --- a/java_codebase_rag/config.py +++ b/java_codebase_rag/config.py @@ -123,20 +123,33 @@ def find_yaml_config_file(source_root: Path) -> Path | None: return None +def _has_index_dir(directory: Path) -> bool: + """True if *directory* contains a non-empty ``.java-codebase-rag/`` index directory.""" + idx = directory / ".java-codebase-rag" + return idx.is_dir() and any(idx.iterdir()) + + def discover_project_root(start: Path) -> Path | None: - """Walk up from start to find the directory containing a config file. + """Walk up from start to find the directory containing a config file or index. - First match wins (closest to start). Stops at $HOME inclusive — checks $HOME - itself but does not walk past it. Returns None if no config found. + Looks for ``.java-codebase-rag.yml`` / ``.java-codebase-rag.yaml`` (preferred) + or the ``.java-codebase-rag/`` index directory as a project boundary marker. + + First match wins (closest to start). Config file takes priority over index + directory at the same level. Stops at $HOME inclusive — checks $HOME itself + but does not walk past it. Returns None if no marker found. """ start = start.resolve() home = Path.home().resolve() current = start while True: - # Check if current directory contains a config file + # Config file is the primary anchor if find_yaml_config_file(current) is not None: return current + # Index directory is the secondary anchor (supports indexes without config) + if _has_index_dir(current): + return current # Stop if we've reached home (check home itself, but don't walk past it) if current == home: diff --git a/tests/test_config.py b/tests/test_config.py index 75035725..3da68f70 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -80,6 +80,62 @@ def test_discover_project_root_first_match_wins(self, tmp_path): # Should find the closest config (subdir), not the parent (tmp_path) assert result == subdir + def test_discover_project_root_finds_nonempty_index_dir(self, tmp_path): + """Non-empty .java-codebase-rag/ directory acts as project anchor.""" + subdir = tmp_path / "microservice" + subdir.mkdir() + idx = tmp_path / ".java-codebase-rag" + idx.mkdir() + (idx / "code_graph.kuzu").write_bytes(b"\x00" * 16) + + result = discover_project_root(subdir) + assert result == tmp_path + + def test_discover_project_root_skips_empty_index_dir(self, tmp_path): + """Empty .java-codebase-rag/ directory does not anchor the project.""" + subdir = tmp_path / "microservice" + subdir.mkdir() + # Empty index dir at subdir level + empty_idx = subdir / ".java-codebase-rag" + empty_idx.mkdir() + # Real index at parent level + real_idx = tmp_path / ".java-codebase-rag" + real_idx.mkdir() + (real_idx / "code_graph.kuzu").write_bytes(b"\x00" * 16) + + result = discover_project_root(subdir) + assert result == tmp_path + + def test_discover_project_root_config_wins_over_index_dir(self, tmp_path): + """Config file takes priority over index dir at the same level.""" + subdir = tmp_path / "subdir" + subdir.mkdir() + # Index dir at tmp_path level + idx = tmp_path / ".java-codebase-rag" + idx.mkdir() + (idx / "code_graph.kuzu").write_bytes(b"\x00" * 16) + # Config at subdir level + config_file = subdir / YAML_CONFIG_FILENAMES[0] + config_file.write_text("# child config") + + deep = subdir / "deep" + deep.mkdir() + result = discover_project_root(deep) + # Config at subdir is closer and wins + assert result == subdir + + def test_discover_project_root_both_markers_same_level(self, tmp_path): + """When both config and index dir exist at same dir, both resolve correctly.""" + # Both markers in the same directory + config_file = tmp_path / YAML_CONFIG_FILENAMES[0] + config_file.write_text("# config") + idx = tmp_path / ".java-codebase-rag" + idx.mkdir() + (idx / "code_graph.kuzu").write_bytes(b"\x00" * 16) + + result = discover_project_root(tmp_path) + assert result == tmp_path + class TestSourceRootFromYaml: """Tests for source_root YAML field parsing and resolution.""" From de552a01952844b91e71528fa0b91b070dca1b9a Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sun, 7 Jun 2026 17:39:04 +0300 Subject: [PATCH 2/2] fix detect_microservice_from_path for cwd at microservice root microservice_for_path walks _bounded_parents which excludes the path itself. When cwd is exactly the microservice root directory (the most common case), it was never checked for a build marker and auto-scope returned None. Pass a synthetic child path so cwd appears in the parent walk. Co-Authored-By: Claude Opus 4.7 --- graph_enrich.py | 7 +++++-- tests/test_microservice_scope.py | 22 ++++++++++++++++------ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/graph_enrich.py b/graph_enrich.py index 6135a97c..c2d7b70d 100644 --- a/graph_enrich.py +++ b/graph_enrich.py @@ -1589,8 +1589,11 @@ def detect_microservice_from_path(cwd: Path, source_root: Path) -> str | None: if overrides and cwd_resolved.name in overrides: return cwd_resolved.name - # Call existing microservice_for_path to detect microservice from build markers - ms = microservice_for_path(str(cwd_resolved), source_resolved) + # microservice_for_path walks _bounded_parents which excludes the path itself. + # For query-time detection we need cwd included in the walk, so pass a synthetic + # child path so that cwd appears as a parent in the build-marker scan. + synthetic = cwd_resolved / "__scope_probe__" + ms = microservice_for_path(str(synthetic), source_resolved) return ms if ms else None diff --git a/tests/test_microservice_scope.py b/tests/test_microservice_scope.py index 0200b59e..84a60807 100644 --- a/tests/test_microservice_scope.py +++ b/tests/test_microservice_scope.py @@ -21,20 +21,30 @@ def test_detect_microservice_deep_inside(self, tmp_path): assert result == "microservice-a" def test_detect_microservice_at_microservice_root(self, tmp_path): - """At microservice root detects that microservice.""" + """At microservice root (cwd = the dir with pom.xml) detects that microservice.""" ms_dir = tmp_path / "microservice-b" ms_dir.mkdir() # Add a build marker (ms_dir / "build.gradle").write_text("plugins { id 'java' }") - # Use a subdirectory inside the microservice (not the root itself) - sub_dir = ms_dir / "src" - sub_dir.mkdir() - - result = detect_microservice_from_path(sub_dir, tmp_path) + # cwd IS the microservice root — the most common user scenario + result = detect_microservice_from_path(ms_dir, tmp_path) assert result == "microservice-b" + def test_detect_microservice_nested_modules(self, tmp_path): + """Nested build markers scope to outermost microservice, not inner module.""" + ms_dir = tmp_path / "my-service" + ms_dir.mkdir() + (ms_dir / "pom.xml").write_text("") + module_dir = ms_dir / "my-module" + module_dir.mkdir() + (module_dir / "pom.xml").write_text("") + + # From inside the module, should scope to the service, not the module + result = detect_microservice_from_path(module_dir, tmp_path) + assert result == "my-service" + def test_detect_microservice_at_system_root(self, tmp_path): """At system root returns None (no specific scope).""" result = detect_microservice_from_path(tmp_path, tmp_path)