NVIDIA
diff --git a/‎README.rst‎
Lines changed: 10 additions & 3 deletions b/‎README.rst‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎build_tools/utils.py‎
Lines changed: 6 additions & 2 deletions b/‎build_tools/utils.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎docs/_static/css/diagram-colors.css‎
Lines changed: 134 additions & 0 deletions b/‎docs/_static/css/diagram-colors.css‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎docs/_static/css/sphinx_tabs.css‎
Lines changed: 45 additions & 0 deletions b/‎docs/_static/css/sphinx_tabs.css‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎docs/_static/css/svg-responsive.css‎
Lines changed: 72 additions & 0 deletions b/‎docs/_static/css/svg-responsive.css‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎docs/_templates/layout.html‎
Lines changed: 4 additions & 0 deletions b/‎docs/_templates/layout.html‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 4 additions & 1 deletion b/‎docs/conf.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎docs/debug/1_getting_started.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/debug/1_getting_started.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/debug/3_api_features.rst‎
Lines changed: 5 additions & 2 deletions b/‎docs/debug/3_api_features.rst‎
Lines changed: 5 additions & 2 deletions
@@ -175,15 +175,22 @@ For example to use the NGC PyTorch container interactively,
 
 .. code-block:: bash
 
-    docker run --gpus all -it --rm nvcr.io/nvidia/pytorch:25.08-py3
+    docker run --gpus all -it --rm nvcr.io/nvidia/pytorch:26.01-py3
 
 For example to use the NGC JAX container interactively,
 
 .. code-block:: bash
 
-    docker run --gpus all -it --rm nvcr.io/nvidia/jax:25.08-py3
+    docker run --gpus all -it --rm nvcr.io/nvidia/jax:26.01-py3
 
-Where 25.08 (corresponding to August 2025 release) is the container version.
+Where 26.01 (corresponding to January 2026 release) is the container version.
+
+We recommend updating to the latest NGC container available here:
+
+* https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch
+* https://catalog.ngc.nvidia.com/orgs/nvidia/containers/jax
+
+If you run any examples, please ensure you are using a matching version of TransformerEngine. TransformerEngine is pre-built and packaged inside the containers with examples available at ``/opt/transformerengine`` or ``/opt/transformer-engine``. If you would like to use examples from TE main branch and are running into import errors, please try the latest pip package or building from source, although NGC containers are recommended for ease-of-use for most users.
 
 **Benefits of using NGC containers:**
 
 
@@ -228,9 +228,10 @@ def nvcc_path() -> Tuple[str, str]:
 def get_cuda_include_dirs() -> Tuple[str, str]:
     """Returns the CUDA header directory."""
 
+    force_wheels = bool(int(os.getenv("NVTE_BUILD_USE_NVIDIA_WHEELS", "0")))
     # If cuda is installed via toolkit, all necessary headers
     # are bundled inside the top level cuda directory.
-    if cuda_toolkit_include_path() is not None:
+    if not force_wheels and cuda_toolkit_include_path() is not None:
         return [cuda_toolkit_include_path()]
 
     # Use pip wheels to include all headers.
@@ -239,7 +240,10 @@ def get_cuda_include_dirs() -> Tuple[str, str]:
     except ModuleNotFoundError as e:
         raise RuntimeError("CUDA not found.")
 
-    cuda_root = Path(nvidia.__file__).parent
+    if nvidia.__file__ is not None:
+        cuda_root = Path(nvidia.__file__).parent
+    else:
+        cuda_root = Path(nvidia.__path__[0])  # namespace
     return [
         subdir / "include"
         for subdir in cuda_root.iterdir()
 
@@ -0,0 +1,134 @@
+/* Diagram color definitions for Transformer Engine documentation */
+
+/* High precision (BF16/FP16) elements */
+.hp {
+  fill: #ede7f6;
+  stroke: #673ab7;
+  stroke-width: 2;
+}
+
+/* FP8 precision elements */
+.fp8 {
+  fill: #fff8e1;
+  stroke: #ffa726;
+  stroke-width: 2;
+}
+
+/* GEMM/computation operations */
+.gemm {
+  fill: #ffe0b2;
+  stroke: #fb8c00;
+  stroke-width: 2.5;
+}
+
+/* Quantization operations */
+.quantize {
+  fill: #e8f5e9;
+  stroke: #66bb6a;
+  stroke-width: 2;
+}
+
+/* Amax computation operations */
+.amax {
+  fill: #e1f5fe;
+  stroke: #039be5;
+  stroke-width: 2;
+}
+
+/* Text styles */
+.text {
+  font-family: 'Segoe UI', Arial, sans-serif;
+  font-size: 14px;
+  text-anchor: middle;
+  fill: #212121;
+}
+
+.small-text {
+  font-family: 'Segoe UI', Arial, sans-serif;
+  font-size: 14px;
+  text-anchor: middle;
+  fill: #757575;
+}
+
+.label {
+  font-family: 'Segoe UI', Arial, sans-serif;
+  font-size: 14px;
+  text-anchor: middle;
+  fill: #424242;
+}
+
+.title {
+  font-family: 'Segoe UI', Arial, sans-serif;
+  font-size: 18px;
+  font-weight: 600;
+  text-anchor: middle;
+  fill: #212121;
+}
+
+.section-title {
+  font-family: 'Segoe UI', Arial, sans-serif;
+  font-size: 15px;
+  font-weight: 600;
+  text-anchor: middle;
+}
+
+/* Arrows */
+/* Note: marker-end references #arrowhead marker which must be defined in each SVG's <defs> section */
+.arrow {
+  stroke: #616161;
+  stroke-width: 2;
+  fill: none;
+  marker-end: url(#arrowhead);
+}
+
+/* Additional box and element styles */
+.box-blue {
+  fill: #e3f2fd;
+  stroke: #1976d2;
+  stroke-width: 2;
+}
+
+.box-orange {
+  fill: #fff3e0;
+  stroke: #f57c00;
+  stroke-width: 2;
+}
+
+.box-green {
+  fill: #c8e6c9;
+  stroke: #388e3c;
+  stroke-width: 2;
+}
+
+.box-dashed {
+  stroke-dasharray: 5,5;
+}
+
+/* LayerNorm specific */
+.layernorm {
+  fill: #b3e5fc;
+  stroke: #0277bd;
+  stroke-width: 2.5;
+}
+
+/* Fused layers */
+.fused {
+  fill: #b2dfdb;
+  stroke: #00695c;
+  stroke-width: 3;
+}
+
+/* Generic computation blocks */
+.computation {
+  fill: #f5f5f5;
+  stroke: #757575;
+  stroke-width: 2;
+}
+
+/* FP32 precision (alternative red) */
+.fp32 {
+  fill: #ffcdd2;
+  stroke: #d32f2f;
+  stroke-width: 2.5;
+}
+
@@ -0,0 +1,45 @@
+/* Custom styling for sphinx-tabs */
+
+.sphinx-tabs {
+    margin-bottom: 1rem;
+}
+
+.sphinx-tabs-tab {
+    background-color: #f4f4f4;
+    border: 1px solid #ccc;
+    border-bottom: none;
+    padding: 0.5rem 1rem;
+    margin-right: 0.5rem;
+    cursor: pointer;
+    font-weight: 500;
+    transition: background-color 0.2s;
+}
+
+.sphinx-tabs-tab:hover {
+    background-color: #e0e0e0;
+}
+
+.sphinx-tabs-tab[aria-selected="true"] {
+    background-color: #76b900; /* NVIDIA green */
+    color: white;
+    border-color: #76b900;
+    margin-right: 0.5rem;
+}
+
+.sphinx-tabs-panel {
+    border: 1px solid #ccc;
+    padding: 1rem;
+    background-color: #f9f9f9;
+}
+
+/* Dark mode support for RTD theme */
+.rst-content .sphinx-tabs-tab {
+    color: #333;
+}
+
+.rst-content .sphinx-tabs-tab[aria-selected="true"] {
+    color: white;
+}
+
+
+
@@ -0,0 +1,72 @@
+/* Responsive styling for SVG images */
+
+/* Make all SVG images responsive */
+.document svg,
+.document object[type="image/svg+xml"],
+.rst-content svg {
+    max-width: 100%;
+    height: auto;
+    display: block;
+    margin: 1em auto;
+}
+
+/* For raw HTML embedded SVGs */
+.document .raw-html svg {
+    max-width: 100%;
+    height: auto;
+    width: 100%;
+}
+
+/* Ensure container doesn't overflow */
+.document .raw-html {
+    max-width: 100%;
+    overflow-x: auto;
+}
+
+/* Figure containers with captions */
+.svg-figure {
+    text-align: center;
+    margin: 20px auto;
+}
+
+.svg-figure img {
+    display: block;
+    margin: 0 auto;
+    height: auto;
+}
+
+/* Different width classes for figures */
+.svg-figure.width-70 img {
+    width: 70%;
+    max-width: 100%;
+}
+
+.svg-figure.width-80 img {
+    width: 80%;
+    max-width: 100%;
+}
+
+.svg-figure.width-90 img {
+    width: 90%;
+    max-width: 100%;
+}
+
+.svg-figure.width-100 img {
+    width: 100%;
+}
+
+/* Figure captions */
+.svg-caption {
+    font-style: italic;
+    margin-top: 10px;
+    color: #555;
+    font-size: 0.95em;
+    line-height: 1.4;
+}
+
+
+
+
+
+
+
@@ -67,6 +67,10 @@
         overflow: visible !important;
     }
 
+    .quant {
+        background-color: yellow !important;
+    }
+
   </style>
   <style>
   a:link, a:visited {
 
@@ -84,8 +84,11 @@
 html_css_files = [
     "css/nvidia_font.css",
     "css/nvidia_footer.css",
-    "css/rtabs.css",
     "css/output-style.css",
+    "css/diagram-colors.css",
+    "css/sphinx_tabs.css",
+    "css/svg-responsive.css",
+    "css/rtabs.css",
 ]
 
 html_theme_options = {
 
@@ -15,7 +15,7 @@ Transformer Engine provides a set of precision debug tools which allow you to ea
 - log the statistics for each of the tensors in every matrix multiply (GEMM) operation,
 - run selected GEMMs in higher precision,
 - run current scaling - with one scaling factor per tensor - for particular GEMMs,
-- test new precisions and integrate them with FP8 training,
+- test new precisions and integrate them with quantized training (FP8, NVFP4, etc.),
 - ... and many more.
 
 There are 4 things one needs to do to use Transformer Engine debug features:
 
@@ -8,7 +8,10 @@ Debug features
 
 .. autoapiclass:: transformer_engine.debug.features.log_tensor_stats.LogTensorStats
 .. autoapiclass:: transformer_engine.debug.features.log_fp8_tensor_stats.LogFp8TensorStats
-.. autoapiclass:: transformer_engine.debug.features.disable_fp8_gemm.DisableFP8GEMM
-.. autoapiclass:: transformer_engine.debug.features.disable_fp8_layer.DisableFP8Layer
+.. autoapiclass:: transformer_engine.debug.features.log_nvfp4_tensor_stats.LogNvfp4TensorStats
+.. autoapiclass:: transformer_engine.debug.features.disable_quantization_gemm.DisableQuantizationGEMM
+.. autoapiclass:: transformer_engine.debug.features.disable_quantization_layer.DisableQuantizationLayer
 .. autoapiclass:: transformer_engine.debug.features.per_tensor_scaling.PerTensorScaling
 .. autoapiclass:: transformer_engine.debug.features.fake_quant.FakeQuant
+.. autoapiclass:: transformer_engine.debug.features.disable_fp8_gemm.DisableFP8GEMM
+.. autoapiclass:: transformer_engine.debug.features.disable_fp8_layer.DisableFP8Layer