feat: implement upsert_documents() method for Index and IndexAsyncio (#595)

jhamon · web-flow · commit a88ef6221fb8 · 2026-01-30T15:07:23.000-05:00
## Summary Add the `upsert_documents()` method to the `Index` and `IndexAsyncio` classes for upserting flat JSON documents into a namespace. - Documents are indexed based on the configured index schema - Each document must have an `_id` field - Vector fields can be user-specified (e.g., `my_vector`) or use the reserved `_values` key - Text fields are indexed based on schema configuration with `full_text_searchable: true` ## Usage Example ```python from pinecone import Pinecone pc = Pinecone() index = pc.Index(host="example-index-host") # Upsert documents with pre-computed vectors index.upsert_documents( namespace="movies", documents=[ { "_id": "movie-1", "title": "Return of the Pink Panther", "year": 1986, "genre": "comedy", "embedding": [0.1, 0.2, 0.3, ...] # matches schema field name }, { "_id": "movie-2", "title": "The Pink Panther Strikes Again", "year": 1976, "genre": "comedy", "embedding": [0.3, 0.4, 0.5, ...] } ] ) ``` ## Async Example ```python import asyncio from pinecone import Pinecone async def main(): pc = Pinecone() async with pc.IndexAsyncio(host="example-index-host") as index: await index.upsert_documents( namespace="movies", documents=[ {"_id": "movie-1", "title": "Test", "embedding": [0.1, 0.2]} ] ) asyncio.run(main()) ``` ## Test Plan - [x] Unit tests for parameter validation (namespace required, documents required) - [x] Unit tests for request creation with various document formats - [x] Unit tests for response handling - [x] Unit tests for async version ## Related - Linear: SDK-112 - Builds on: SDK-111 (search_documents implementation, now merged)  --- > [!NOTE] > **Medium Risk** > Adds a new document-write API surface (`upsert_documents`) that sends user-provided JSON to the backend; behavior is mostly additive but touches data-ingestion paths and response handling. > > **Overview** > **Adds document upsert support.** Introduces `upsert_documents(namespace, documents)` on `Index` and `_IndexAsyncio` (and their interfaces) to upsert flat JSON documents (requiring `_id`) via `DocumentOperationsApi`. > > The new methods validate inputs, wrap payloads in `DocumentUpsertRequest`, and return `UpsertResponse` with extracted `_response_info`, falling back to `len(documents)` when the server omits `upserted_count`. > > Adds unit tests covering request creation, parameter validation, API invocation wiring, async behavior, and the `upserted_count` fallback. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit b75f5b0. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>
diff --git a/pinecone/db_data/index.py b/pinecone/db_data/index.py
@@ -914,6 +914,83 @@ def search_documents(
             documents=documents, usage=usage, _response_info=response_info
         )
 
+    @validate_and_convert_errors
+    def upsert_documents(self, namespace: str, documents: list[dict[str, Any]]) -> UpsertResponse:
+        """Upsert documents into a namespace.
+
+        This operation upserts flat JSON documents into a namespace. Documents are indexed
+        based on the configured index schema. Each document must have an ``_id`` field.
+
+        Args:
+            namespace: The namespace to upsert documents into.
+            documents: A list of flat JSON documents to upsert. Each document must have an
+                ``_id`` field and fields that match the index's schema configuration.
+
+        Returns:
+            UpsertResponse: Object containing the number of documents upserted.
+
+        Examples:
+
+        .. code-block:: python
+
+            from pinecone import Pinecone
+
+            pc = Pinecone()
+            index = pc.Index(host="example-index-host")
+
+            # Upsert documents with pre-computed vectors
+            index.upsert_documents(
+                namespace="movies",
+                documents=[
+                    {
+                        "_id": "movie-1",
+                        "title": "Return of the Pink Panther",
+                        "year": 1986,
+                        "genre": "comedy",
+                        "embedding": [0.1, 0.2, 0.3, ...]  # matches schema field name
+                    },
+                    {
+                        "_id": "movie-2",
+                        "title": "The Pink Panther Strikes Again",
+                        "year": 1976,
+                        "genre": "comedy",
+                        "embedding": [0.3, 0.4, 0.5, ...]
+                    }
+                ]
+            )
+
+        """
+        if namespace is None:
+            raise ValueError("Namespace is required when upserting documents")
+        if not documents:
+            raise ValueError("At least one document is required")
+
+        from pinecone.core.openapi.db_data.model.document_upsert_request import (
+            DocumentUpsertRequest,
+        )
+
+        request = DocumentUpsertRequest(value=documents)
+        result = self.document_api.upsert_documents(namespace, request)
+
+        # Extract response info
+        from pinecone.utils.response_info import extract_response_info
+
+        response_info = None
+        if hasattr(result, "_response_info"):
+            response_info = result._response_info
+        if response_info is None:
+            response_info = extract_response_info({})
+
+        # Extract upserted_count from result
+        upserted_count = 0
+        if hasattr(result, "upserted_count") and result.upserted_count is not None:
+            upserted_count = result.upserted_count
+        else:
+            # Fallback to document count if server doesn't return count
+            upserted_count = len(documents)
+
+        return UpsertResponse(upserted_count=upserted_count, _response_info=response_info)
+
     @validate_and_convert_errors
     def delete(
         self,
diff --git a/pinecone/db_data/index_asyncio.py b/pinecone/db_data/index_asyncio.py
@@ -899,6 +899,88 @@ async def main():
             documents=documents, usage=usage, _response_info=response_info
         )
 
+    @validate_and_convert_errors
+    async def upsert_documents(
+        self, namespace: str, documents: List[Dict[str, Any]]
+    ) -> UpsertResponse:
+        """Upsert documents into a namespace.
+
+        This operation upserts flat JSON documents into a namespace. Documents are indexed
+        based on the configured index schema. Each document must have an ``_id`` field.
+
+        Args:
+            namespace: The namespace to upsert documents into.
+            documents: A list of flat JSON documents to upsert. Each document must have an
+                ``_id`` field and fields that match the index's schema configuration.
+
+        Returns:
+            UpsertResponse: Object containing the number of documents upserted.
+
+        Examples:
+
+        .. code-block:: python
+
+            import asyncio
+            from pinecone import Pinecone
+
+            async def main():
+                pc = Pinecone()
+                async with pc.IndexAsyncio(host="example-index-host") as index:
+                    # Upsert documents with pre-computed vectors
+                    await index.upsert_documents(
+                        namespace="movies",
+                        documents=[
+                            {
+                                "_id": "movie-1",
+                                "title": "Return of the Pink Panther",
+                                "year": 1986,
+                                "genre": "comedy",
+                                "embedding": [0.1, 0.2, 0.3, ...]  # matches schema field name
+                            },
+                            {
+                                "_id": "movie-2",
+                                "title": "The Pink Panther Strikes Again",
+                                "year": 1976,
+                                "genre": "comedy",
+                                "embedding": [0.3, 0.4, 0.5, ...]
+                            }
+                        ]
+                    )
+
+            asyncio.run(main())
+
+        """
+        if namespace is None:
+            raise ValueError("Namespace is required when upserting documents")
+        if not documents:
+            raise ValueError("At least one document is required")
+
+        from pinecone.core.openapi.db_data.model.document_upsert_request import (
+            DocumentUpsertRequest,
+        )
+
+        request = DocumentUpsertRequest(value=documents)
+        result = await self.document_api.upsert_documents(namespace, request)
+
+        # Extract response info
+        from pinecone.utils.response_info import extract_response_info
+
+        response_info = None
+        if hasattr(result, "_response_info"):
+            response_info = result._response_info
+        if response_info is None:
+            response_info = extract_response_info({})
+
+        # Extract upserted_count from result
+        upserted_count = 0
+        if hasattr(result, "upserted_count") and result.upserted_count is not None:
+            upserted_count = result.upserted_count
+        else:
+            # Fallback to document count if server doesn't return count
+            upserted_count = len(documents)
+
+        return UpsertResponse(upserted_count=upserted_count, _response_info=response_info)
+
     def _openapi_kwargs(self, kwargs: dict[str, Any]) -> dict[str, Any]:
         return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS)
 
diff --git a/pinecone/db_data/index_asyncio_interface.py b/pinecone/db_data/index_asyncio_interface.py
@@ -925,6 +925,59 @@ async def search_records(
         """Alias of the search() method."""
         pass
 
+    @abstractmethod
+    async def upsert_documents(
+        self, namespace: str, documents: List[Dict[str, Any]]
+    ) -> UpsertResponse:
+        """Upsert documents into a namespace.
+
+        This operation upserts flat JSON documents into a namespace. Documents are indexed
+        based on the configured index schema. Each document must have an ``_id`` field.
+
+        Args:
+            namespace: The namespace to upsert documents into.
+            documents: A list of flat JSON documents to upsert. Each document must have an
+                ``_id`` field and fields that match the index's schema configuration.
+
+        Returns:
+            UpsertResponse: Object containing the number of documents upserted.
+
+        Examples:
+
+        .. code-block:: python
+
+            import asyncio
+            from pinecone import Pinecone
+
+            async def main():
+                pc = Pinecone()
+                async with pc.IndexAsyncio(host="example-index-host") as index:
+                    # Upsert documents with pre-computed vectors
+                    await index.upsert_documents(
+                        namespace="movies",
+                        documents=[
+                            {
+                                "_id": "movie-1",
+                                "title": "Return of the Pink Panther",
+                                "year": 1986,
+                                "genre": "comedy",
+                                "embedding": [0.1, 0.2, 0.3, ...]
+                            },
+                            {
+                                "_id": "movie-2",
+                                "title": "The Pink Panther Strikes Again",
+                                "year": 1976,
+                                "genre": "comedy",
+                                "embedding": [0.3, 0.4, 0.5, ...]
+                            }
+                        ]
+                    )
+
+            asyncio.run(main())
+
+        """
+        pass
+
     @abstractmethod
     @require_kwargs
     async def create_namespace(
diff --git a/pinecone/db_data/interfaces.py b/pinecone/db_data/interfaces.py
@@ -507,6 +507,54 @@ def search_documents(
         """
         pass
 
+    @abstractmethod
+    def upsert_documents(self, namespace: str, documents: list[dict[str, Any]]) -> UpsertResponse:
+        """Upsert documents into a namespace.
+
+        This operation upserts flat JSON documents into a namespace. Documents are indexed
+        based on the configured index schema. Each document must have an ``_id`` field.
+
+        Args:
+            namespace: The namespace to upsert documents into.
+            documents: A list of flat JSON documents to upsert. Each document must have an
+                ``_id`` field and fields that match the index's schema configuration.
+
+        Returns:
+            UpsertResponse: Object containing the number of documents upserted.
+
+        Examples:
+
+        .. code-block:: python
+
+            from pinecone import Pinecone
+
+            pc = Pinecone()
+            index = pc.Index(host="example-index-host")
+
+            # Upsert documents with pre-computed vectors
+            index.upsert_documents(
+                namespace="movies",
+                documents=[
+                    {
+                        "_id": "movie-1",
+                        "title": "Return of the Pink Panther",
+                        "year": 1986,
+                        "genre": "comedy",
+                        "embedding": [0.1, 0.2, 0.3, ...]  # matches schema field name
+                    },
+                    {
+                        "_id": "movie-2",
+                        "title": "The Pink Panther Strikes Again",
+                        "year": 1976,
+                        "genre": "comedy",
+                        "embedding": [0.3, 0.4, 0.5, ...]
+                    }
+                ]
+            )
+
+        """
+        pass
+
     @abstractmethod
     def delete(
         self,
diff --git a/tests/unit/data/test_upsert_documents.py b/tests/unit/data/test_upsert_documents.py