Skip to content

Commit a88ef62

Browse files
authored
feat: implement upsert_documents() method for Index and IndexAsyncio (#595)
## Summary Add the `upsert_documents()` method to the `Index` and `IndexAsyncio` classes for upserting flat JSON documents into a namespace. - Documents are indexed based on the configured index schema - Each document must have an `_id` field - Vector fields can be user-specified (e.g., `my_vector`) or use the reserved `_values` key - Text fields are indexed based on schema configuration with `full_text_searchable: true` ## Usage Example ```python from pinecone import Pinecone pc = Pinecone() index = pc.Index(host="example-index-host") # Upsert documents with pre-computed vectors index.upsert_documents( namespace="movies", documents=[ { "_id": "movie-1", "title": "Return of the Pink Panther", "year": 1986, "genre": "comedy", "embedding": [0.1, 0.2, 0.3, ...] # matches schema field name }, { "_id": "movie-2", "title": "The Pink Panther Strikes Again", "year": 1976, "genre": "comedy", "embedding": [0.3, 0.4, 0.5, ...] } ] ) ``` ## Async Example ```python import asyncio from pinecone import Pinecone async def main(): pc = Pinecone() async with pc.IndexAsyncio(host="example-index-host") as index: await index.upsert_documents( namespace="movies", documents=[ {"_id": "movie-1", "title": "Test", "embedding": [0.1, 0.2]} ] ) asyncio.run(main()) ``` ## Test Plan - [x] Unit tests for parameter validation (namespace required, documents required) - [x] Unit tests for request creation with various document formats - [x] Unit tests for response handling - [x] Unit tests for async version ## Related - Linear: SDK-112 - Builds on: SDK-111 (search_documents implementation, now merged) <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Adds a new document-write API surface (`upsert_documents`) that sends user-provided JSON to the backend; behavior is mostly additive but touches data-ingestion paths and response handling. > > **Overview** > **Adds document upsert support.** Introduces `upsert_documents(namespace, documents)` on `Index` and `_IndexAsyncio` (and their interfaces) to upsert flat JSON documents (requiring `_id`) via `DocumentOperationsApi`. > > The new methods validate inputs, wrap payloads in `DocumentUpsertRequest`, and return `UpsertResponse` with extracted `_response_info`, falling back to `len(documents)` when the server omits `upserted_count`. > > Adds unit tests covering request creation, parameter validation, API invocation wiring, async behavior, and the `upserted_count` fallback. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit b75f5b0. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent c26c062 commit a88ef62

File tree

5 files changed

+477
-0
lines changed

5 files changed

+477
-0
lines changed

pinecone/db_data/index.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,83 @@ def search_documents(
914914
documents=documents, usage=usage, _response_info=response_info
915915
)
916916

917+
@validate_and_convert_errors
918+
def upsert_documents(self, namespace: str, documents: list[dict[str, Any]]) -> UpsertResponse:
919+
"""Upsert documents into a namespace.
920+
921+
This operation upserts flat JSON documents into a namespace. Documents are indexed
922+
based on the configured index schema. Each document must have an ``_id`` field.
923+
924+
Args:
925+
namespace: The namespace to upsert documents into.
926+
documents: A list of flat JSON documents to upsert. Each document must have an
927+
``_id`` field and fields that match the index's schema configuration.
928+
929+
Returns:
930+
UpsertResponse: Object containing the number of documents upserted.
931+
932+
Examples:
933+
934+
.. code-block:: python
935+
936+
from pinecone import Pinecone
937+
938+
pc = Pinecone()
939+
index = pc.Index(host="example-index-host")
940+
941+
# Upsert documents with pre-computed vectors
942+
index.upsert_documents(
943+
namespace="movies",
944+
documents=[
945+
{
946+
"_id": "movie-1",
947+
"title": "Return of the Pink Panther",
948+
"year": 1986,
949+
"genre": "comedy",
950+
"embedding": [0.1, 0.2, 0.3, ...] # matches schema field name
951+
},
952+
{
953+
"_id": "movie-2",
954+
"title": "The Pink Panther Strikes Again",
955+
"year": 1976,
956+
"genre": "comedy",
957+
"embedding": [0.3, 0.4, 0.5, ...]
958+
}
959+
]
960+
)
961+
962+
"""
963+
if namespace is None:
964+
raise ValueError("Namespace is required when upserting documents")
965+
if not documents:
966+
raise ValueError("At least one document is required")
967+
968+
from pinecone.core.openapi.db_data.model.document_upsert_request import (
969+
DocumentUpsertRequest,
970+
)
971+
972+
request = DocumentUpsertRequest(value=documents)
973+
result = self.document_api.upsert_documents(namespace, request)
974+
975+
# Extract response info
976+
from pinecone.utils.response_info import extract_response_info
977+
978+
response_info = None
979+
if hasattr(result, "_response_info"):
980+
response_info = result._response_info
981+
if response_info is None:
982+
response_info = extract_response_info({})
983+
984+
# Extract upserted_count from result
985+
upserted_count = 0
986+
if hasattr(result, "upserted_count") and result.upserted_count is not None:
987+
upserted_count = result.upserted_count
988+
else:
989+
# Fallback to document count if server doesn't return count
990+
upserted_count = len(documents)
991+
992+
return UpsertResponse(upserted_count=upserted_count, _response_info=response_info)
993+
917994
@validate_and_convert_errors
918995
def delete(
919996
self,

pinecone/db_data/index_asyncio.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,88 @@ async def main():
899899
documents=documents, usage=usage, _response_info=response_info
900900
)
901901

902+
@validate_and_convert_errors
903+
async def upsert_documents(
904+
self, namespace: str, documents: List[Dict[str, Any]]
905+
) -> UpsertResponse:
906+
"""Upsert documents into a namespace.
907+
908+
This operation upserts flat JSON documents into a namespace. Documents are indexed
909+
based on the configured index schema. Each document must have an ``_id`` field.
910+
911+
Args:
912+
namespace: The namespace to upsert documents into.
913+
documents: A list of flat JSON documents to upsert. Each document must have an
914+
``_id`` field and fields that match the index's schema configuration.
915+
916+
Returns:
917+
UpsertResponse: Object containing the number of documents upserted.
918+
919+
Examples:
920+
921+
.. code-block:: python
922+
923+
import asyncio
924+
from pinecone import Pinecone
925+
926+
async def main():
927+
pc = Pinecone()
928+
async with pc.IndexAsyncio(host="example-index-host") as index:
929+
# Upsert documents with pre-computed vectors
930+
await index.upsert_documents(
931+
namespace="movies",
932+
documents=[
933+
{
934+
"_id": "movie-1",
935+
"title": "Return of the Pink Panther",
936+
"year": 1986,
937+
"genre": "comedy",
938+
"embedding": [0.1, 0.2, 0.3, ...] # matches schema field name
939+
},
940+
{
941+
"_id": "movie-2",
942+
"title": "The Pink Panther Strikes Again",
943+
"year": 1976,
944+
"genre": "comedy",
945+
"embedding": [0.3, 0.4, 0.5, ...]
946+
}
947+
]
948+
)
949+
950+
asyncio.run(main())
951+
952+
"""
953+
if namespace is None:
954+
raise ValueError("Namespace is required when upserting documents")
955+
if not documents:
956+
raise ValueError("At least one document is required")
957+
958+
from pinecone.core.openapi.db_data.model.document_upsert_request import (
959+
DocumentUpsertRequest,
960+
)
961+
962+
request = DocumentUpsertRequest(value=documents)
963+
result = await self.document_api.upsert_documents(namespace, request)
964+
965+
# Extract response info
966+
from pinecone.utils.response_info import extract_response_info
967+
968+
response_info = None
969+
if hasattr(result, "_response_info"):
970+
response_info = result._response_info
971+
if response_info is None:
972+
response_info = extract_response_info({})
973+
974+
# Extract upserted_count from result
975+
upserted_count = 0
976+
if hasattr(result, "upserted_count") and result.upserted_count is not None:
977+
upserted_count = result.upserted_count
978+
else:
979+
# Fallback to document count if server doesn't return count
980+
upserted_count = len(documents)
981+
982+
return UpsertResponse(upserted_count=upserted_count, _response_info=response_info)
983+
902984
def _openapi_kwargs(self, kwargs: dict[str, Any]) -> dict[str, Any]:
903985
return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS)
904986

pinecone/db_data/index_asyncio_interface.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,59 @@ async def search_records(
925925
"""Alias of the search() method."""
926926
pass
927927

928+
@abstractmethod
929+
async def upsert_documents(
930+
self, namespace: str, documents: List[Dict[str, Any]]
931+
) -> UpsertResponse:
932+
"""Upsert documents into a namespace.
933+
934+
This operation upserts flat JSON documents into a namespace. Documents are indexed
935+
based on the configured index schema. Each document must have an ``_id`` field.
936+
937+
Args:
938+
namespace: The namespace to upsert documents into.
939+
documents: A list of flat JSON documents to upsert. Each document must have an
940+
``_id`` field and fields that match the index's schema configuration.
941+
942+
Returns:
943+
UpsertResponse: Object containing the number of documents upserted.
944+
945+
Examples:
946+
947+
.. code-block:: python
948+
949+
import asyncio
950+
from pinecone import Pinecone
951+
952+
async def main():
953+
pc = Pinecone()
954+
async with pc.IndexAsyncio(host="example-index-host") as index:
955+
# Upsert documents with pre-computed vectors
956+
await index.upsert_documents(
957+
namespace="movies",
958+
documents=[
959+
{
960+
"_id": "movie-1",
961+
"title": "Return of the Pink Panther",
962+
"year": 1986,
963+
"genre": "comedy",
964+
"embedding": [0.1, 0.2, 0.3, ...]
965+
},
966+
{
967+
"_id": "movie-2",
968+
"title": "The Pink Panther Strikes Again",
969+
"year": 1976,
970+
"genre": "comedy",
971+
"embedding": [0.3, 0.4, 0.5, ...]
972+
}
973+
]
974+
)
975+
976+
asyncio.run(main())
977+
978+
"""
979+
pass
980+
928981
@abstractmethod
929982
@require_kwargs
930983
async def create_namespace(

pinecone/db_data/interfaces.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,54 @@ def search_documents(
507507
"""
508508
pass
509509

510+
@abstractmethod
511+
def upsert_documents(self, namespace: str, documents: list[dict[str, Any]]) -> UpsertResponse:
512+
"""Upsert documents into a namespace.
513+
514+
This operation upserts flat JSON documents into a namespace. Documents are indexed
515+
based on the configured index schema. Each document must have an ``_id`` field.
516+
517+
Args:
518+
namespace: The namespace to upsert documents into.
519+
documents: A list of flat JSON documents to upsert. Each document must have an
520+
``_id`` field and fields that match the index's schema configuration.
521+
522+
Returns:
523+
UpsertResponse: Object containing the number of documents upserted.
524+
525+
Examples:
526+
527+
.. code-block:: python
528+
529+
from pinecone import Pinecone
530+
531+
pc = Pinecone()
532+
index = pc.Index(host="example-index-host")
533+
534+
# Upsert documents with pre-computed vectors
535+
index.upsert_documents(
536+
namespace="movies",
537+
documents=[
538+
{
539+
"_id": "movie-1",
540+
"title": "Return of the Pink Panther",
541+
"year": 1986,
542+
"genre": "comedy",
543+
"embedding": [0.1, 0.2, 0.3, ...] # matches schema field name
544+
},
545+
{
546+
"_id": "movie-2",
547+
"title": "The Pink Panther Strikes Again",
548+
"year": 1976,
549+
"genre": "comedy",
550+
"embedding": [0.3, 0.4, 0.5, ...]
551+
}
552+
]
553+
)
554+
555+
"""
556+
pass
557+
510558
@abstractmethod
511559
def delete(
512560
self,

0 commit comments

Comments
 (0)