microsoft
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.semversioner/next-release/patch-20260205172351041030.json‎
Lines changed: 4 additions & 0 deletions b/‎.semversioner/next-release/patch-20260205172351041030.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/examples_notebooks/api_overview.ipynb‎
Lines changed: 3 additions & 2 deletions b/‎docs/examples_notebooks/api_overview.ipynb‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎docs/examples_notebooks/input_documents.ipynb‎
Lines changed: 3 additions & 2 deletions b/‎docs/examples_notebooks/input_documents.ipynb‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎packages/graphrag-cache/README.md‎
Lines changed: 3 additions & 70 deletions b/‎packages/graphrag-cache/README.md‎
Lines changed: 3 additions & 70 deletions
diff --git a/‎packages/graphrag-cache/example_notebooks/basic_cache_example.ipynb‎
Lines changed: 96 additions & 0 deletions b/‎packages/graphrag-cache/example_notebooks/basic_cache_example.ipynb‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎packages/graphrag-cache/example_notebooks/custom_cache_example.ipynb‎
Lines changed: 129 additions & 0 deletions b/‎packages/graphrag-cache/example_notebooks/custom_cache_example.ipynb‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎packages/graphrag-chunking/README.md‎
Lines changed: 4 additions & 21 deletions b/‎packages/graphrag-chunking/README.md‎
Lines changed: 4 additions & 21 deletions
@@ -63,3 +63,7 @@ docsite/
 
 # Root build assets
 packages/*/LICENSE
+
+# Notebooks outputs
+packages/graphrag-*/example_notebooks/**/output
+packages/graphrag-*/example_notebooks/**/cache
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "update notebooks"
+}
@@ -28,10 +28,11 @@
     "from pathlib import Path\n",
     "from pprint import pprint\n",
     "\n",
-    "import graphrag.api as api\n",
     "import pandas as pd\n",
     "from graphrag.config.load_config import load_config\n",
-    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
+    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
+    "\n",
+    "import graphrag.api as api"
    ]
   },
   {
 
@@ -30,10 +30,11 @@
     "from pathlib import Path\n",
     "from pprint import pprint\n",
     "\n",
-    "import graphrag.api as api\n",
     "import pandas as pd\n",
     "from graphrag.config.load_config import load_config\n",
-    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
+    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
+    "\n",
+    "import graphrag.api as api"
    ]
   },
   {
 
@@ -6,80 +6,13 @@ This package contains a collection of utilities to handle GraphRAG caching imple
 
 This example shows how to create a JSON cache with file storage using the GraphRAG cache package's configuration system. 
 
-```python
-import asyncio
-from graphrag_storage import StorageConfig, create_storage, StorageType
-from graphrag_cache import CacheConfig, create_cache, CacheType, create_cache_key
-
-async def run():
-    cache = create_cache()
-
-    # The above is equivalent to the following:
-    cache = create_cache(
-        CacheConfig(
-            type=CacheType.Json,
-            storage=StorageConfig(
-                type=StorageType.File,
-                base_dir="cache"
-            )
-        ),
-    )
-
-    await cache.set("my_key", {"some": "object to cache"})
-    print(await cache.get("my_key"))
-
-    # create cache key from data dict.
-    cache_key = create_cache_key({
-        "some_arg": "some_value",
-        "something_else": 5
-    })
-    await cache.set(cache_key, {"some": "object to cache"})
-    print(await cache.get(cache_key))
-
-if __name__ == "__main__":
-    asyncio.run(run())
-```
+[Open the notebook to explore the basic example code](example-notebooks/basic_cache_example.ipynb)
 
 ### Custom Cache
 
-This demonstrates how to create a custom cache implementation by extending the base Cache class and registering it with the GraphRAG cache system. Once registered, the custom cache can be instantiated through the factory pattern using either CacheConfig or directly via cache_factory, allowing for extensible caching solutions tailored to specific needs.
+This example demonstrates how to create a custom cache implementation by extending the base Cache class and registering it with the GraphRAG cache system. Once registered, the custom cache can be instantiated through the factory pattern using either CacheConfig or directly via cache_factory, allowing for extensible caching solutions tailored to specific needs.
 
-```python
-import asyncio
-from typing import Any
-from graphrag_storage import Storage
-from graphrag_cache import Cache, CacheConfig, create_cache, register_cache
-
-class MyCache(Cache):
-    def __init__(self, some_setting: str, optional_setting: str = "default setting", **kwargs: Any):
-        # Validate settings and initialize
-        # View the JsonCache implementation to see how to create a cache that relies on a Storage provider.
-        ...
-
-    #Implement rest of interface
-    ...
-
-register_cache("MyCache", MyCache)
-
-async def run():
-    cache = create_cache(
-        CacheConfig(
-            type="MyCache",
-            some_setting="My Setting"
-        )
-    )
-
-    # Or use the factory directly to instantiate with a dict instead of using
-    # CacheConfig + create_factory
-    # from graphrag_cache.cache_factory import cache_factory
-    # cache = cache_factory.create(strategy="MyCache", init_args={"some_setting": "My Setting"})
-
-    await cache.set("my_key", {"some": "object to cache"})
-    print(await cache.get("my_key"))
-
-if __name__ == "__main__":
-    asyncio.run(run())
-```
+[Open the notebook to explore the basic custom example code](example-notebooks/custom_cache_example.ipynb)
 
 #### Details
 
 
@@ -0,0 +1,96 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "fcb917cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (c) 2026 Microsoft Corporation.\n",
+    "# Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "33461307",
+   "metadata": {},
+   "source": [
+    "## Basic cache example\n",
+    "\n",
+    "This example shows how to create a JSON cache with file storage using the GraphRAG cache package's configuration system. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ee33abd6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Value stored in cache for 'my_key':\n",
+      "{'k1': 'object to cache'}\n",
+      "\n",
+      "Value stored in cache for cache_key using data dict:\n",
+      "{'k2': 'object to cache'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from graphrag_cache import CacheConfig, CacheType, create_cache, create_cache_key\n",
+    "from graphrag_storage import StorageConfig, StorageType\n",
+    "\n",
+    "\n",
+    "async def run():\n",
+    "    \"\"\"Demonstrate basic cache usage with graphrag_cache.\"\"\"\n",
+    "    cache = create_cache()\n",
+    "\n",
+    "    # The above is equivalent to the following:\n",
+    "    cache = create_cache(\n",
+    "        CacheConfig(\n",
+    "            type=CacheType.Json,\n",
+    "            storage=StorageConfig(type=StorageType.File, base_dir=\"cache\"),\n",
+    "        ),\n",
+    "    )\n",
+    "\n",
+    "    await cache.set(\"my_key\", {\"k1\": \"object to cache\"})\n",
+    "    print(\"Value stored in cache for 'my_key':\")\n",
+    "    print(await cache.get(\"my_key\"))\n",
+    "\n",
+    "    # create cache key from data dict.\n",
+    "    cache_key = create_cache_key({\"some_arg\": \"some_value\", \"something_else\": 5})\n",
+    "    await cache.set(cache_key, {\"k2\": \"object to cache\"})\n",
+    "    print(\"\\nValue stored in cache for cache_key using data dict:\")\n",
+    "    print(await cache.get(cache_key))\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    await run()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,129 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "42524e97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (c) 2026 Microsoft Corporation.\n",
+    "# Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a7cd6743",
+   "metadata": {},
+   "source": [
+    "## Custom cache example\n",
+    "\n",
+    "This example demonstrates how to create a custom cache implementation by extending the base Cache class and registering it with the GraphRAG cache system. Once registered, the custom cache can be instantiated through the factory pattern using either CacheConfig or directly via cache_factory, allowing for extensible caching solutions tailored to specific needs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e7ec8dc1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Value stored in cache for 'my_key':\n",
+      "{'k1': 'object to cache'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from typing import Any\n",
+    "\n",
+    "from graphrag_cache import Cache, CacheConfig, create_cache, register_cache\n",
+    "\n",
+    "\n",
+    "class MyCache(Cache):\n",
+    "    \"\"\"Custom cache implementation for storing and retrieving cached data.\"\"\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        some_setting: str,\n",
+    "        optional_setting: str = \"default setting\",\n",
+    "        **kwargs: Any,\n",
+    "    ):\n",
+    "        # Validate settings and initialize\n",
+    "        # View the JsonCache implementation to see how to create a cache that relies on a Storage provider.\n",
+    "        self.some_setting = some_setting\n",
+    "        self.optional_setting = optional_setting\n",
+    "        self._cache: dict[str, Any] = {}\n",
+    "        self._child: Cache = self\n",
+    "\n",
+    "    async def get(self, key: str) -> Any:\n",
+    "        \"\"\"Retrieve a value from the cache by key.\"\"\"\n",
+    "        return self._cache.get(key)\n",
+    "\n",
+    "    async def set(self, key: str, value: Any, debug_data: Any = None) -> None:\n",
+    "        \"\"\"Store a value in the cache with the specified key.\"\"\"\n",
+    "        self._cache[key] = value\n",
+    "\n",
+    "    async def has(self, key: str) -> bool:\n",
+    "        \"\"\"Check if a key exists in the cache.\"\"\"\n",
+    "        return key in self._cache\n",
+    "\n",
+    "    async def delete(self, key: str) -> None:\n",
+    "        \"\"\"Remove a key and its value from the cache.\"\"\"\n",
+    "        self._cache.pop(key, None)\n",
+    "\n",
+    "    async def clear(self) -> None:\n",
+    "        \"\"\"Clear all items from the cache.\"\"\"\n",
+    "        self._cache.clear()\n",
+    "\n",
+    "    def child(self, name: str) -> Cache:\n",
+    "        \"\"\"Create or access a child cache (not implemented in this example).\"\"\"\n",
+    "        return self._child\n",
+    "\n",
+    "\n",
+    "register_cache(\"MyCache\", MyCache)\n",
+    "\n",
+    "\n",
+    "async def run():\n",
+    "    \"\"\"Demonstrate usage of the custom cache implementation.\"\"\"\n",
+    "    cache = create_cache(\n",
+    "        CacheConfig(\n",
+    "            type=\"MyCache\",\n",
+    "            some_setting=\"important setting value\",  # type: ignore\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    await cache.set(\"my_key\", {\"k1\": \"object to cache\"})\n",
+    "    print(\"Value stored in cache for 'my_key':\")\n",
+    "    print(await cache.get(\"my_key\"))\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    await run()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -8,34 +8,17 @@ This package contains a collection of text chunkers, a core config model, and a
 
 The SentenceChunker class splits text into individual sentences by identifying sentence boundaries. It takes input text and returns a list where each element is a separate sentence, making it easy to process text at the sentence level.
 
-```python
-chunker = SentenceChunker()
-chunks = chunker.chunk("This is a test. Another sentence.")
-print(chunks) # ["This is a test.", "Another sentence."]
-```
+[Open the notebook to explore the basic sentence example code](example_notebooks/basic_sentence_example.ipynb)
 
 ### Token chunking
 
 The TokenChunker splits text into fixed-size chunks based on token count rather than sentence boundaries. It uses a tokenizer to encode text into tokens, then creates chunks of a specified size with configurable overlap between chunks.
 
-```python
-tokenizer = tiktoken.get_encoding("o200k_base")
-chunker = TokenChunker(size=3, overlap=0, encode=tokenizer.encode, decode=tokenizer.decode)
-chunks = chunker.chunk("This is a random test fragment of some text")
-print(chunks) # ["This is a", " random test fragment", " of some text"]
-```
+[Open the notebook to explore the token chunking example code](example_notebooks/token_chunking_example.ipynb)
+
 
 ### Using the factory via helper util
 
 The create_chunker factory function provides a configuration-driven approach to instantiate chunkers by accepting a ChunkingConfig object that specifies the chunking strategy and parameters. This allows for more flexible and maintainable code by separating chunker configuration from direct instantiation.
 
-```python
-tokenizer = tiktoken.get_encoding("o200k_base")
-config = ChunkingConfig(
-    strategy="tokens",
-    size=3,
-    overlap=0
-)
-chunker = create_chunker(config, tokenizer.encode, tokenizer.decode)
-...
-```
+[Open the notebook to explore the factory helper util example code](example_notebooks/factory_helper_util_example.ipynb)
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "type": "patch",
 +  "description": "update notebooks"
 +}
Original file line number	Diff line number	Diff line change
`@@ -28,10 +28,11 @@`
`28`	`28`	`"from pathlib import Path\n",`
`29`	`29`	`"from pprint import pprint\n",`
`30`	`30`	`"\n",`
`31`		`- "import graphrag.api as api\n",`
`32`	`31`	`"import pandas as pd\n",`
`33`	`32`	`"from graphrag.config.load_config import load_config\n",`
`34`		`- "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"`
	`33`	`+ "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",`
	`34`	`+ "\n",`
	`35`	`+ "import graphrag.api as api"`
`35`	`36`	`]`
`36`	`37`	`},`
`37`	`38`	`{`
Original file line number	Diff line number	Diff line change
`@@ -30,10 +30,11 @@`
`30`	`30`	`"from pathlib import Path\n",`
`31`	`31`	`"from pprint import pprint\n",`
`32`	`32`	`"\n",`
`33`		`- "import graphrag.api as api\n",`
`34`	`33`	`"import pandas as pd\n",`
`35`	`34`	`"from graphrag.config.load_config import load_config\n",`
`36`		`- "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"`
	`35`	`+ "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",`
	`36`	`+ "\n",`
	`37`	`+ "import graphrag.api as api"`
`37`	`38`	`]`
`38`	`39`	`},`
`39`	`40`	`{`