Skip to content

Commit 7e5c10e

Browse files
edvilmeCopilot
andauthored
refactor: extract notebook helpers into lsp_notebook.py (#713)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 105f0fe commit 7e5c10e

4 files changed

Lines changed: 620 additions & 18 deletions

File tree

bundled/tool/lsp_notebook.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# Copyright (c) Microsoft Corporation. All rights reserved.
2+
# Licensed under the MIT License.
3+
"""Notebook-specific helpers for whole-notebook linting with cross-cell context."""
4+
5+
from __future__ import annotations
6+
7+
import dataclasses
8+
import re
9+
from typing import Callable, Optional, Protocol, Sequence
10+
11+
from lsprotocol import types as lsp
12+
13+
14+
class TextDocumentLike(Protocol):
15+
"""Protocol for objects that provide text document attributes."""
16+
17+
source: str
18+
language_id: str
19+
20+
21+
@dataclasses.dataclass
22+
class SyntheticDocument:
23+
"""Typed stand-in for ``workspace.TextDocument`` used in notebook linting.
24+
25+
Replaces ``types.SimpleNamespace`` so that the synthetic document has
26+
an explicit, portable shape that can be type-checked.
27+
"""
28+
29+
uri: str
30+
path: str
31+
source: str
32+
language_id: str = "python"
33+
version: int = 0
34+
35+
36+
# Matches IPython magic lines (%, %%, !, !!) so they can be replaced with `pass`.
37+
MAGIC_LINE_RE = re.compile(r"^\s*(?:%%\w|%(?!=)\w|!!|!(?!=)\w)")
38+
39+
NOTEBOOK_SYNC_OPTIONS = lsp.NotebookDocumentSyncOptions(
40+
notebook_selector=[
41+
lsp.NotebookDocumentFilterWithNotebook(
42+
notebook="jupyter-notebook",
43+
cells=[
44+
lsp.NotebookCellLanguage(language="python"),
45+
],
46+
),
47+
lsp.NotebookDocumentFilterWithNotebook(
48+
notebook="interactive",
49+
cells=[
50+
lsp.NotebookCellLanguage(language="python"),
51+
],
52+
),
53+
],
54+
save=True,
55+
)
56+
57+
58+
@dataclasses.dataclass
59+
class CellOffset:
60+
"""Describes where a single notebook cell's lines begin in the combined source."""
61+
62+
cell_uri: str
63+
start_line: int
64+
line_count: int
65+
66+
67+
CellMap = list[CellOffset]
68+
69+
70+
def build_notebook_source(
71+
cells: list, # NotebookCell objects (can't import type without pygls dependency)
72+
get_text_document: Callable[[str], Optional[TextDocumentLike]],
73+
) -> tuple[str, CellMap]:
74+
"""Build a single Python source string from all code cells.
75+
76+
Args:
77+
cells: The notebook's cell list (``nb.cells``).
78+
get_text_document: A callable that resolves a cell document URI to a
79+
text document object (with ``.source`` and ``.language_id``
80+
attributes), e.g. ``workspace.get_text_document``.
81+
82+
Returns:
83+
(combined_source, cell_map) where *cell_map* is a list of
84+
:class:`CellOffset` instances describing where each cell's lines
85+
begin in the combined source.
86+
87+
IPython magic lines (``%``, ``%%``, ``!``, etc.) are replaced with
88+
``pass`` statements so the linter does not raise syntax errors on them.
89+
"""
90+
source_parts: list[str] = []
91+
cell_map: CellMap = []
92+
current_line = 0
93+
94+
for cell in cells:
95+
if cell.kind != lsp.NotebookCellKind.Code or cell.document is None:
96+
continue
97+
doc = get_text_document(cell.document)
98+
if doc is None or doc.language_id != "python":
99+
continue
100+
101+
source = doc.source
102+
if not source:
103+
continue
104+
105+
lines = source.splitlines(keepends=True)
106+
# Ensure the last line ends with a newline.
107+
if lines and not lines[-1].endswith("\n"):
108+
lines[-1] += "\n"
109+
110+
sanitized_lines = [
111+
"pass\n" if MAGIC_LINE_RE.match(line) else line for line in lines
112+
]
113+
114+
cell_map.append(CellOffset(cell.document, current_line, len(sanitized_lines)))
115+
source_parts.extend(sanitized_lines)
116+
current_line += len(sanitized_lines)
117+
118+
return "".join(source_parts), cell_map
119+
120+
121+
def get_cell_for_line(global_line: int, cell_map: CellMap) -> CellOffset | None:
122+
"""Return the :class:`CellOffset` entry that owns *global_line*.
123+
124+
*global_line* is a 0-based line number in the combined notebook source.
125+
Returns ``None`` if no cell owns the line.
126+
"""
127+
for entry in cell_map:
128+
if entry.start_line <= global_line < entry.start_line + entry.line_count:
129+
return entry
130+
return None
131+
132+
133+
def remap_diagnostics_to_cells(
134+
diagnostics: Sequence[lsp.Diagnostic],
135+
cell_map: CellMap,
136+
) -> dict[str, list[lsp.Diagnostic]]:
137+
"""Map combined-source diagnostics back to individual cell URIs.
138+
139+
Each diagnostic's line range is adjusted relative to the owning cell.
140+
Diagnostics whose start line doesn't fall in any cell are discarded.
141+
If a diagnostic's end line crosses a cell boundary it is clamped.
142+
"""
143+
per_cell: dict[str, list[lsp.Diagnostic]] = {
144+
entry.cell_uri: [] for entry in cell_map
145+
}
146+
147+
for diag in diagnostics:
148+
entry = get_cell_for_line(diag.range.start.line, cell_map)
149+
if entry is None:
150+
continue
151+
152+
local_start_line = diag.range.start.line - entry.start_line
153+
local_start = lsp.Position(
154+
line=local_start_line,
155+
character=diag.range.start.character,
156+
)
157+
158+
# Clamp end line to the cell boundary (defensive).
159+
max_end_line = entry.line_count - 1
160+
raw_end_line = diag.range.end.line - entry.start_line
161+
clamped = raw_end_line > max_end_line
162+
local_end_line = min(raw_end_line, max_end_line)
163+
local_end = lsp.Position(
164+
line=local_end_line,
165+
character=0 if clamped else diag.range.end.character,
166+
)
167+
168+
# Ensure end is not before start (inverted range violates LSP spec)
169+
if (
170+
local_end.line == local_start.line
171+
and local_end.character < local_start.character
172+
):
173+
local_end = lsp.Position(
174+
line=local_start.line, character=local_start.character
175+
)
176+
177+
remapped = lsp.Diagnostic(
178+
range=lsp.Range(start=local_start, end=local_end),
179+
message=diag.message,
180+
severity=diag.severity,
181+
code=diag.code,
182+
code_description=diag.code_description,
183+
source=diag.source,
184+
# TODO: remap related_information locations through cell_map when a tool
185+
# starts emitting them; forwarding raw combined-source positions produces
186+
# incorrect navigation targets.
187+
related_information=diag.related_information,
188+
tags=diag.tags,
189+
data=diag.data,
190+
)
191+
per_cell[entry.cell_uri].append(remapped)
192+
193+
return per_cell

bundled/tool/lsp_server.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def update_environ_path() -> None:
6767
import lsp_edit_utils as edit_utils
6868
import lsp_io
6969
import lsp_jsonrpc as jsonrpc
70+
import lsp_notebook as notebook
7071
import lsp_utils as utils
7172
from lsprotocol import types as lsp
7273
from pygls import uris
@@ -78,28 +79,12 @@ def update_environ_path() -> None:
7879
RUNNER = pathlib.Path(__file__).parent / "lsp_runner.py"
7980

8081
MAX_WORKERS = 5
81-
NOTEBOOK_SYNC_OPTIONS = lsp.NotebookDocumentSyncOptions(
82-
notebook_selector=[
83-
lsp.NotebookDocumentFilterWithNotebook(
84-
notebook="jupyter-notebook",
85-
cells=[
86-
lsp.NotebookCellLanguage(language="python"),
87-
],
88-
),
89-
lsp.NotebookDocumentFilterWithNotebook(
90-
notebook="interactive",
91-
cells=[
92-
lsp.NotebookCellLanguage(language="python"),
93-
],
94-
),
95-
],
96-
save=True,
97-
)
82+
9883
LSP_SERVER = LanguageServer(
9984
name="black-server",
10085
version="v0.1.0",
10186
max_workers=MAX_WORKERS,
102-
notebook_document_sync=NOTEBOOK_SYNC_OPTIONS,
87+
notebook_document_sync=notebook.NOTEBOOK_SYNC_OPTIONS,
10388
)
10489

10590

bundled/tool/tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

0 commit comments

Comments
 (0)