|
| 1 | +# Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | +# Licensed under the MIT License. |
| 3 | +"""Notebook-specific helpers for whole-notebook linting with cross-cell context.""" |
| 4 | + |
| 5 | +from __future__ import annotations |
| 6 | + |
| 7 | +import dataclasses |
| 8 | +import re |
| 9 | +from typing import Callable, Optional, Protocol, Sequence |
| 10 | + |
| 11 | +from lsprotocol import types as lsp |
| 12 | + |
| 13 | + |
| 14 | +class TextDocumentLike(Protocol): |
| 15 | + """Protocol for objects that provide text document attributes.""" |
| 16 | + |
| 17 | + source: str |
| 18 | + language_id: str |
| 19 | + |
| 20 | + |
| 21 | +@dataclasses.dataclass |
| 22 | +class SyntheticDocument: |
| 23 | + """Typed stand-in for ``workspace.TextDocument`` used in notebook linting. |
| 24 | +
|
| 25 | + Replaces ``types.SimpleNamespace`` so that the synthetic document has |
| 26 | + an explicit, portable shape that can be type-checked. |
| 27 | + """ |
| 28 | + |
| 29 | + uri: str |
| 30 | + path: str |
| 31 | + source: str |
| 32 | + language_id: str = "python" |
| 33 | + version: int = 0 |
| 34 | + |
| 35 | + |
| 36 | +# Matches IPython magic lines (%, %%, !, !!) so they can be replaced with `pass`. |
| 37 | +MAGIC_LINE_RE = re.compile(r"^\s*(?:%%\w|%(?!=)\w|!!|!(?!=)\w)") |
| 38 | + |
| 39 | +NOTEBOOK_SYNC_OPTIONS = lsp.NotebookDocumentSyncOptions( |
| 40 | + notebook_selector=[ |
| 41 | + lsp.NotebookDocumentFilterWithNotebook( |
| 42 | + notebook="jupyter-notebook", |
| 43 | + cells=[ |
| 44 | + lsp.NotebookCellLanguage(language="python"), |
| 45 | + ], |
| 46 | + ), |
| 47 | + lsp.NotebookDocumentFilterWithNotebook( |
| 48 | + notebook="interactive", |
| 49 | + cells=[ |
| 50 | + lsp.NotebookCellLanguage(language="python"), |
| 51 | + ], |
| 52 | + ), |
| 53 | + ], |
| 54 | + save=True, |
| 55 | +) |
| 56 | + |
| 57 | + |
| 58 | +@dataclasses.dataclass |
| 59 | +class CellOffset: |
| 60 | + """Describes where a single notebook cell's lines begin in the combined source.""" |
| 61 | + |
| 62 | + cell_uri: str |
| 63 | + start_line: int |
| 64 | + line_count: int |
| 65 | + |
| 66 | + |
| 67 | +CellMap = list[CellOffset] |
| 68 | + |
| 69 | + |
| 70 | +def build_notebook_source( |
| 71 | + cells: list, # NotebookCell objects (can't import type without pygls dependency) |
| 72 | + get_text_document: Callable[[str], Optional[TextDocumentLike]], |
| 73 | +) -> tuple[str, CellMap]: |
| 74 | + """Build a single Python source string from all code cells. |
| 75 | +
|
| 76 | + Args: |
| 77 | + cells: The notebook's cell list (``nb.cells``). |
| 78 | + get_text_document: A callable that resolves a cell document URI to a |
| 79 | + text document object (with ``.source`` and ``.language_id`` |
| 80 | + attributes), e.g. ``workspace.get_text_document``. |
| 81 | +
|
| 82 | + Returns: |
| 83 | + (combined_source, cell_map) where *cell_map* is a list of |
| 84 | + :class:`CellOffset` instances describing where each cell's lines |
| 85 | + begin in the combined source. |
| 86 | +
|
| 87 | + IPython magic lines (``%``, ``%%``, ``!``, etc.) are replaced with |
| 88 | + ``pass`` statements so the linter does not raise syntax errors on them. |
| 89 | + """ |
| 90 | + source_parts: list[str] = [] |
| 91 | + cell_map: CellMap = [] |
| 92 | + current_line = 0 |
| 93 | + |
| 94 | + for cell in cells: |
| 95 | + if cell.kind != lsp.NotebookCellKind.Code or cell.document is None: |
| 96 | + continue |
| 97 | + doc = get_text_document(cell.document) |
| 98 | + if doc is None or doc.language_id != "python": |
| 99 | + continue |
| 100 | + |
| 101 | + source = doc.source |
| 102 | + if not source: |
| 103 | + continue |
| 104 | + |
| 105 | + lines = source.splitlines(keepends=True) |
| 106 | + # Ensure the last line ends with a newline. |
| 107 | + if lines and not lines[-1].endswith("\n"): |
| 108 | + lines[-1] += "\n" |
| 109 | + |
| 110 | + sanitized_lines = [ |
| 111 | + "pass\n" if MAGIC_LINE_RE.match(line) else line for line in lines |
| 112 | + ] |
| 113 | + |
| 114 | + cell_map.append(CellOffset(cell.document, current_line, len(sanitized_lines))) |
| 115 | + source_parts.extend(sanitized_lines) |
| 116 | + current_line += len(sanitized_lines) |
| 117 | + |
| 118 | + return "".join(source_parts), cell_map |
| 119 | + |
| 120 | + |
| 121 | +def get_cell_for_line(global_line: int, cell_map: CellMap) -> CellOffset | None: |
| 122 | + """Return the :class:`CellOffset` entry that owns *global_line*. |
| 123 | +
|
| 124 | + *global_line* is a 0-based line number in the combined notebook source. |
| 125 | + Returns ``None`` if no cell owns the line. |
| 126 | + """ |
| 127 | + for entry in cell_map: |
| 128 | + if entry.start_line <= global_line < entry.start_line + entry.line_count: |
| 129 | + return entry |
| 130 | + return None |
| 131 | + |
| 132 | + |
| 133 | +def remap_diagnostics_to_cells( |
| 134 | + diagnostics: Sequence[lsp.Diagnostic], |
| 135 | + cell_map: CellMap, |
| 136 | +) -> dict[str, list[lsp.Diagnostic]]: |
| 137 | + """Map combined-source diagnostics back to individual cell URIs. |
| 138 | +
|
| 139 | + Each diagnostic's line range is adjusted relative to the owning cell. |
| 140 | + Diagnostics whose start line doesn't fall in any cell are discarded. |
| 141 | + If a diagnostic's end line crosses a cell boundary it is clamped. |
| 142 | + """ |
| 143 | + per_cell: dict[str, list[lsp.Diagnostic]] = { |
| 144 | + entry.cell_uri: [] for entry in cell_map |
| 145 | + } |
| 146 | + |
| 147 | + for diag in diagnostics: |
| 148 | + entry = get_cell_for_line(diag.range.start.line, cell_map) |
| 149 | + if entry is None: |
| 150 | + continue |
| 151 | + |
| 152 | + local_start_line = diag.range.start.line - entry.start_line |
| 153 | + local_start = lsp.Position( |
| 154 | + line=local_start_line, |
| 155 | + character=diag.range.start.character, |
| 156 | + ) |
| 157 | + |
| 158 | + # Clamp end line to the cell boundary (defensive). |
| 159 | + max_end_line = entry.line_count - 1 |
| 160 | + raw_end_line = diag.range.end.line - entry.start_line |
| 161 | + clamped = raw_end_line > max_end_line |
| 162 | + local_end_line = min(raw_end_line, max_end_line) |
| 163 | + local_end = lsp.Position( |
| 164 | + line=local_end_line, |
| 165 | + character=0 if clamped else diag.range.end.character, |
| 166 | + ) |
| 167 | + |
| 168 | + # Ensure end is not before start (inverted range violates LSP spec) |
| 169 | + if ( |
| 170 | + local_end.line == local_start.line |
| 171 | + and local_end.character < local_start.character |
| 172 | + ): |
| 173 | + local_end = lsp.Position( |
| 174 | + line=local_start.line, character=local_start.character |
| 175 | + ) |
| 176 | + |
| 177 | + remapped = lsp.Diagnostic( |
| 178 | + range=lsp.Range(start=local_start, end=local_end), |
| 179 | + message=diag.message, |
| 180 | + severity=diag.severity, |
| 181 | + code=diag.code, |
| 182 | + code_description=diag.code_description, |
| 183 | + source=diag.source, |
| 184 | + # TODO: remap related_information locations through cell_map when a tool |
| 185 | + # starts emitting them; forwarding raw combined-source positions produces |
| 186 | + # incorrect navigation targets. |
| 187 | + related_information=diag.related_information, |
| 188 | + tags=diag.tags, |
| 189 | + data=diag.data, |
| 190 | + ) |
| 191 | + per_cell[entry.cell_uri].append(remapped) |
| 192 | + |
| 193 | + return per_cell |
0 commit comments