Skip to content

Commit 66bb3a0

Browse files
HumanBean17claude
andauthored
fix(cli): erase removes graph/cocoindex.db/.graph_hashes.json by type (#346) (#348)
* fix(cli): erase removes graph/cocoindex.db/.graph_hashes.json by type (#346) `erase` reported success but left code_graph.lbug on disk because its deletion was type-blind: shutil.rmtree silently no-ops on a regular file (code_graph.lbug) and Path.unlink raises IsADirectoryError on a directory (cocoindex.db), both swallowed; .graph_hashes.json was never targeted. The next init then refused (exit 2), deadlooping the documented `erase --yes` -> `init` clean-slate workflow. Replace the type-blind deletes with a _rm_any helper that dispatches on type (file/dir/symlink — a symlinked dir is unlinked, never recursed into, so the target is not followed), so both the file-backed and dir-backed LadybugDB layouts are handled. erase now also removes .graph_hashes.json and lists it in the "Will delete:" preview. Deletion failures are warned to stderr instead of swallowed, so erase no longer reports success while leaving an artifact behind (the same silent-failure class as #346). `reprocess` is unaffected: its full rebuild opens the existing .lbug and _drop_all()s every node + edge table in place, and _init_hash_tracker resets .graph_hashes.json — it never relies on the broken deletion. Tests: add an always-on regression that creates a real lbug-file / cocoindex.db-dir / hash-store layout and asserts erase removes all three; convert the false-green test_init_after_erase_succeeds into a real build -> erase -> re-init lifecycle check. Co-Authored-By: Claude <noreply@anthropic.com> * bump version to 0.6.6 Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent c92d9fb commit 66bb3a0

4 files changed

Lines changed: 78 additions & 12 deletions

File tree

docs/JAVA-CODEBASE-RAG-CLI.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ java-codebase-rag reprocess --source-root /path/to/java/repo --index-dir /path/t
240240

241241
### `erase`
242242

243-
Deletes cocoindex state, the LadybugDB directory, and Lance tables under the index dir. Requires **`--yes`** or interactive confirmation on a TTY. Non-TTY without `--yes` exits **2**.
243+
Deletes cocoindex state, the LadybugDB graph (`code_graph.lbug`), the graph builder's content-hash store (`.graph_hashes.json`), and Lance tables under the index dir. Requires **`--yes`** or interactive confirmation on a TTY. Non-TTY without `--yes` exits **2**.
244244

245245
```bash
246246
java-codebase-rag erase --source-root /path/to/java/repo --index-dir /path/to/.java-codebase-rag --yes

java_codebase_rag/cli.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -580,11 +580,33 @@ def _cmd_update(args: argparse.Namespace) -> int:
580580
)
581581

582582

583+
def _rm_any(path: Path) -> None:
584+
"""Remove ``path`` whether it is a regular file, directory, or symlink.
585+
586+
``code_graph.lbug`` is a single regular file in this repo, but kuzu may lay
587+
the graph out as a directory; ``cocoindex.db`` is always a directory.
588+
``shutil.rmtree`` is a silent no-op on a regular file and ``Path.unlink``
589+
raises ``IsADirectoryError`` on a directory, so a type-blind delete left
590+
index artifacts on disk (issue #346). A symlinked directory is unlinked, not
591+
recursed into, so the link target is never followed. Failures are warned to
592+
stderr rather than swallowed, so erase does not report success while leaving
593+
an artifact behind (the exact failure mode issue #346 reported).
594+
"""
595+
try:
596+
if path.is_dir() and not path.is_symlink():
597+
shutil.rmtree(path)
598+
elif path.exists() or path.is_symlink():
599+
path.unlink(missing_ok=True)
600+
except OSError as exc:
601+
print(f"warning: failed to remove {path}: {exc}", file=sys.stderr)
602+
603+
583604
def _cmd_erase(args: argparse.Namespace) -> int:
584605
cfg = _resolved_from_ns(args)
585606
_startup_hints(cfg)
586607
cfg.apply_to_os_environ()
587-
to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db]
608+
graph_hashes_path = cfg.ladybug_path.parent / ".graph_hashes.json"
609+
to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db, graph_hashes_path]
588610
if cfg.index_dir.is_dir():
589611
try:
590612
import lancedb
@@ -621,13 +643,15 @@ def work(progress: "PipelineProgress | None") -> int:
621643
)
622644
elif drop.returncode != 0:
623645
print(clip(drop.stderr, 4000), file=sys.stderr)
624-
if cfg.ladybug_path.exists():
625-
shutil.rmtree(cfg.ladybug_path, ignore_errors=True)
626-
if cfg.cocoindex_db.exists():
627-
try:
628-
cfg.cocoindex_db.unlink()
629-
except OSError:
630-
pass
646+
# Remove the LadybugDB graph, the cocoindex state store, and the graph
647+
# builder's content-hash store. Each is removed by type (see _rm_any):
648+
# code_graph.lbug is a file here but may be a dir under kuzu, while
649+
# cocoindex.db is a directory — a type-blind delete silently no-oped on
650+
# one or the other, and .graph_hashes.json was never targeted at all
651+
# (issue #346).
652+
_rm_any(cfg.ladybug_path)
653+
_rm_any(cfg.cocoindex_db)
654+
_rm_any(graph_hashes_path)
631655
if cfg.index_dir.is_dir():
632656
try:
633657
import lancedb

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "java-codebase-rag"
7-
version = "0.6.5"
7+
version = "0.6.6"
88
description = "MCP server for semantic + structural search over Java codebases"
99
readme = "README.md"
1010
requires-python = ">=3.11"

tests/test_java_codebase_rag_cli.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,35 @@ def test_cli_erase_succeeds_with_yes_flag(tmp_path: Path) -> None:
109109
assert proc.returncode == 0, proc.stderr + proc.stdout
110110

111111

112+
def test_erase_removes_graph_file_cocoindex_dir_and_hash_store(tmp_path: Path) -> None:
113+
"""erase must delete code_graph.lbug (file), cocoindex.db (dir), .graph_hashes.json.
114+
115+
Regression for issue #346: a type-blind delete left both on disk.
116+
shutil.rmtree is a silent no-op on a regular file (code_graph.lbug), and
117+
Path.unlink raises IsADirectoryError on cocoindex.db (a directory) — both
118+
swallowed — and .graph_hashes.json was never targeted. The follow-up init
119+
then refused because code_graph.lbug survived.
120+
"""
121+
idx = tmp_path / "erase_artifacts"
122+
idx.mkdir()
123+
# Real on-disk layout: graph is a single FILE, cocoindex state is a DIR.
124+
(idx / "code_graph.lbug").write_bytes(b"fake-kuzu-db")
125+
(idx / "cocoindex.db").mkdir()
126+
(idx / "cocoindex.db" / "state.json").write_text("{}", encoding="utf-8")
127+
(idx / ".graph_hashes.json").write_text("{}", encoding="utf-8")
128+
env = os.environ.copy()
129+
env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(idx)
130+
env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(tmp_path)
131+
proc = _run_cli(
132+
["erase", "--source-root", str(tmp_path), "--index-dir", str(idx), "--yes"],
133+
env=env,
134+
)
135+
assert proc.returncode == 0, proc.stderr + proc.stdout
136+
assert not (idx / "code_graph.lbug").exists(), "erase left code_graph.lbug on disk"
137+
assert not (idx / "cocoindex.db").exists(), "erase left cocoindex.db/ on disk"
138+
assert not (idx / ".graph_hashes.json").exists(), "erase left .graph_hashes.json on disk"
139+
140+
112141
def test_embedding_model_precedence_cli_over_env_over_yaml_over_default(
113142
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
114143
) -> None:
@@ -377,21 +406,34 @@ def test_legacy_env_var_set_emits_stderr_hint(monkeypatch: pytest.MonkeyPatch, t
377406

378407
@pytest.mark.skipif(not _cocoindex_available(), reason="cocoindex not installed in venv")
379408
def test_init_after_erase_succeeds(corpus_root: Path, tmp_path: Path) -> None:
409+
"""Build a real index, erase it, then init again from a clean slate.
410+
411+
Regression for issue #346: the previous body erased an *empty* index dir and
412+
then inited, so it never exercised "erase a real graph -> re-init" and stayed
413+
green while erase silently left code_graph.lbug on disk.
414+
"""
380415
idx = tmp_path / "lifecycle_idx"
381416
idx.mkdir(parents=True)
382417
env = os.environ.copy()
383418
env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(idx)
384419
env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(corpus_root.resolve())
420+
init1 = _run_cli(
421+
["init", "--source-root", str(corpus_root), "--index-dir", str(idx), "--quiet"],
422+
env=env,
423+
)
424+
assert init1.returncode == 0, init1.stdout + init1.stderr
425+
assert (idx / "code_graph.lbug").exists(), "init did not build code_graph.lbug"
385426
e1 = _run_cli(
386427
["erase", "--source-root", str(corpus_root), "--index-dir", str(idx), "--yes"],
387428
env=env,
388429
)
389430
assert e1.returncode == 0, e1.stderr
390-
init = _run_cli(
431+
assert not (idx / "code_graph.lbug").exists(), "erase left code_graph.lbug on disk"
432+
init2 = _run_cli(
391433
["init", "--source-root", str(corpus_root), "--index-dir", str(idx), "--quiet"],
392434
env=env,
393435
)
394-
assert init.returncode == 0, init.stdout + init.stderr
436+
assert init2.returncode == 0, init2.stdout + init2.stderr
395437

396438

397439
@pytest.mark.skipif(not _cocoindex_available(), reason="cocoindex not installed in venv")

0 commit comments

Comments
 (0)