Skip to content

Commit 09db1c9

Browse files
authored
Merge pull request #498 from neuromatch/jupyter-book-2
Update generate_book_v2 and error parsing v2
2 parents a5e08dc + ac6dfd9 commit 09db1c9

2 files changed

Lines changed: 157 additions & 58 deletions

File tree

generate_book_v2.py

Lines changed: 127 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
Run as: python ci/generate_book_v2.py student
1111
"""
1212

13+
import ast
1314
import os
15+
import re
1416
import sys
1517
import json
1618
import yaml
@@ -42,6 +44,7 @@ def main():
4244
"children": [
4345
{
4446
"file": "tutorials/Schedule/schedule_intro.md",
47+
"short_title": "Overview",
4548
"children": [
4649
{"file": "tutorials/Schedule/daily_schedules.md"},
4750
{"file": "tutorials/Schedule/shared_calendars.md"},
@@ -59,9 +62,11 @@ def main():
5962
"children": [
6063
{
6164
"file": "tutorials/TechnicalHelp/tech_intro.md",
65+
"short_title": "Overview",
6266
"children": [
6367
{
6468
"file": "tutorials/TechnicalHelp/Jupyterbook.md",
69+
"short_title": "Using Jupyterbook",
6570
"children": [
6671
{"file": "tutorials/TechnicalHelp/Tutorial_colab.md"},
6772
{"file": "tutorials/TechnicalHelp/Tutorial_kaggle.md"},
@@ -188,6 +193,9 @@ def main():
188193
"site": {
189194
"template": "book-theme",
190195
"domains": ["neuroai.neuromatch.io"],
196+
"logo": "tutorials/static/ai-logo.png",
197+
"favicon": "tutorials/static/ai-logo.png",
198+
"options": {"logo_text": "Neuromatch Academy: NeuroAI"},
191199
"nav": [],
192200
"actions": [{"title": "GitHub", "url": f"https://github.com/{ORG}/{REPO}"}],
193201
},
@@ -233,14 +241,132 @@ def pre_process_notebook(file_path):
233241
with open(file_path, encoding="utf-8") as fh:
234242
content = json.load(fh)
235243
content = open_in_colab_new_tab(content)
236-
content = change_video_widths(content)
244+
content = replace_widgets(content)
237245
content = link_hidden_cells(content)
238246
if ARG == "student":
239247
content = tag_cells_allow_errors(content)
240248
with open(file_path, "w", encoding="utf-8") as fh:
241249
json.dump(content, fh, indent=1, ensure_ascii=False)
242250

243251

252+
def replace_widgets(content):
253+
"""Replace or remove ipywidget-based cells that don't render in static HTML.
254+
255+
JB2/MyST does not embed widget state, so widget cells render as "Loading..."
256+
placeholders. This function handles three patterns:
257+
258+
Video cells — detected by ``display_videos(`` + ``video_ids = [``:
259+
Replaced with a markdown cell containing YouTube and Bilibili iframes
260+
stacked with bold platform labels.
261+
262+
Slide cells — detected by ``# @title <slides title>`` + ``link_id``:
263+
Replaced with a markdown cell containing the OSF iframe and a download
264+
link. Titles matched: "Tutorial slides", "Slides", "Intro Video Slides",
265+
"Outro Video Slides", "Load Slides", "Conclusion slides".
266+
267+
Feedback cells — detected by ``# @title Submit your feedback``:
268+
Removed entirely (pure UI widget, no static equivalent).
269+
"""
270+
WIDGET_W = 730
271+
WIDGET_H = 410
272+
273+
def make_iframe(src):
274+
return (
275+
f'<iframe width="{WIDGET_W}" height="{WIDGET_H}" '
276+
f'src="{src}" '
277+
f'allowfullscreen="" frameborder="0" '
278+
f'allow="autoplay; encrypted-media"></iframe>'
279+
)
280+
281+
new_cells = []
282+
for cell in content["cells"]:
283+
src = "".join(cell.get("source", []))
284+
285+
if cell["cell_type"] != "code":
286+
new_cells.append(cell)
287+
continue
288+
289+
# --- Feedback cells: remove ---
290+
if "# @title Submit your feedback" in src:
291+
continue
292+
293+
# --- Video cells: replace with iframes ---
294+
if "display_videos(" in src and "video_ids = [" in src:
295+
title_match = re.search(r"#\s*@title\s+(.*)", src)
296+
title = title_match.group(1).strip() if title_match else "Video"
297+
298+
ids_match = re.search(r"video_ids\s*=\s*(\[.*?\])", src)
299+
video_ids = []
300+
if ids_match:
301+
try:
302+
video_ids = ast.literal_eval(ids_match.group(1))
303+
except (ValueError, SyntaxError):
304+
pass
305+
306+
if not video_ids:
307+
new_cells.append(cell)
308+
continue
309+
310+
html_parts = [f"<h3>{title}</h3>"]
311+
for platform, vid_id in video_ids:
312+
if platform == "Youtube":
313+
iframe_src = f"https://www.youtube.com/embed/{vid_id}?fs=1&rel=0"
314+
elif platform == "Bilibili":
315+
iframe_src = (
316+
f"https://player.bilibili.com/player.html?bvid={vid_id}&page=1"
317+
)
318+
else:
319+
print(
320+
f" Warning: unknown video platform '{platform}' (id={vid_id}), skipping"
321+
)
322+
continue
323+
html_parts.append(f"<p><b>{platform}</b></p>")
324+
html_parts.append(make_iframe(iframe_src))
325+
326+
new_cells.append(
327+
{
328+
"cell_type": "markdown",
329+
"metadata": {},
330+
"source": ["\n".join(html_parts)],
331+
}
332+
)
333+
continue
334+
335+
# --- Slide cells: replace with OSF iframe + download link ---
336+
if "link_id" in src and "osf.io" in src:
337+
link_id_match = re.search(r'link_id\s*=\s*["\']([^"\']+)["\']', src)
338+
if not link_id_match:
339+
new_cells.append(cell)
340+
continue
341+
342+
link_id = link_id_match.group(1)
343+
download_url = f"https://osf.io/download/{link_id}/"
344+
render_url = (
345+
f"https://mfr.ca-1.osf.io/render?url=https://osf.io/{link_id}/"
346+
f"?direct%26mode=render%26action=download%26mode=render"
347+
)
348+
new_cells.append(
349+
{
350+
"cell_type": "markdown",
351+
"metadata": {},
352+
"source": [
353+
"\n".join(
354+
[
355+
f'<p>If you want to download the slides: <a href="{download_url}">{download_url}</a></p>',
356+
make_iframe(render_url),
357+
]
358+
)
359+
],
360+
}
361+
)
362+
continue
363+
364+
new_cells.append(cell)
365+
366+
content["cells"] = new_cells
367+
return content
368+
369+
244370
def tag_cells_allow_errors(content):
245371
"""Add raises-exception tag to every code cell.
246372
@@ -334,38 +460,5 @@ def link_hidden_cells(content):
334460
return content
335461

336462

337-
def change_video_widths(content):
338-
for cell in content["cells"]:
339-
if "YouTubeVideo" in "".join(cell["source"]):
340-
for ind in range(len(cell["source"])):
341-
cell["source"][ind] = cell["source"][ind].replace("854", "730")
342-
cell["source"][ind] = cell["source"][ind].replace("480", "410")
343-
if (
344-
"# @title Tutorial slides\n" in cell["source"]
345-
or "# @title Slides\n" in cell["source"]
346-
):
347-
slide_link = ""
348-
for line in cell["source"]:
349-
if line.startswith("link_id"):
350-
slide_link = line.split('"')[1]
351-
break
352-
download_link = f"https://osf.io/download/{slide_link}/"
353-
render_link = (
354-
f"https://mfr.ca-1.osf.io/render?url=https://osf.io/{slide_link}/"
355-
f"?direct%26mode=render%26action=download%26mode=render"
356-
)
357-
cell["source"] = [
358-
"# @markdown\n",
359-
"from IPython.display import IFrame\n",
360-
"from ipywidgets import widgets\n",
361-
"out = widgets.Output()\n",
362-
"with out:\n",
363-
f' print(f"If you want to download the slides: {download_link}")\n',
364-
f' display(IFrame(src=f"{render_link}", width=730, height=410))\n',
365-
"display(out)",
366-
]
367-
return content
368-
369-
370463
if __name__ == "__main__":
371464
main()

parse_html_for_errors_v2.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@
33
Post-process JB2-built HTML to strip error output divs.
44
55
JB1 equivalent: nmaci/scripts/parse_html_for_errors.py
6-
JB2 difference: MyST uses different CSS classes for cell output containers.
6+
JB2 difference: MyST wraps error output in:
7+
<div data-name="outputs-container">
8+
<div data-name="safe-output-error">
9+
<pre class="myst-jp-error-output">...</pre>
10+
</div>
11+
</div>
712
8-
JB1 class: "cell_output docutils container"
9-
JB2 classes tried (in order):
10-
- "cell_output" (MyST book-theme)
11-
- "output" (fallback)
12-
- any <div> containing the error text (last resort)
13+
We target the inner div[data-name="safe-output-error"] to detect which
14+
outputs-container holds an error, then decompose the parent
15+
div[data-name="outputs-container"] so nothing is left behind.
1316
1417
Run as: python parse_html_for_errors_v2.py student
1518
"""
@@ -80,27 +83,30 @@ def main():
8083
def strip_error_divs(parsed_html):
8184
"""Remove output divs that contain NotImplementedError or NameError text.
8285
83-
Tries JB1's class first, then JB2/MyST class names, then a broad sweep.
84-
Returns the number of divs removed.
86+
JB2/MyST error output structure:
87+
<div data-name="outputs-container">
88+
<div data-name="safe-output-error">
89+
<pre class="myst-jp-error-output">...</pre>
90+
</div>
91+
</div>
92+
93+
We find the inner error div, check it contains a known error string, then
94+
decompose the parent outputs-container (so no empty wrapper is left).
95+
Returns the number of containers removed.
8596
"""
8697
removed = 0
8798

88-
# JB1 class (sphinx/docutils)
89-
candidates = parsed_html.find_all(
90-
"div", {"class": "cell_output docutils container"}
91-
)
92-
93-
# JB2/MyST book-theme output wrapper
94-
if not candidates:
95-
candidates = parsed_html.find_all("div", {"class": "cell_output"})
96-
97-
# Broader fallback: any <div> that directly wraps an error traceback
98-
if not candidates:
99-
candidates = parsed_html.find_all("div", class_=lambda c: c and "output" in c)
100-
101-
for div in candidates:
102-
if any(err in str(div) for err in ERROR_STRINGS):
103-
div.decompose()
99+
error_divs = parsed_html.find_all("div", attrs={"data-name": "safe-output-error"})
100+
for error_div in error_divs:
101+
if any(err in str(error_div) for err in ERROR_STRINGS):
102+
# Walk up to the outputs-container wrapper and remove the whole thing
103+
parent = error_div.find_parent(
104+
"div", attrs={"data-name": "outputs-container"}
105+
)
106+
if parent:
107+
parent.decompose()
108+
else:
109+
error_div.decompose()
104110
removed += 1
105111

106112
return removed

0 commit comments

Comments
 (0)