llamastack
diff --git a/‎src/llama_stack_client/_client.py‎
Lines changed: 246 additions & 0 deletions b/‎src/llama_stack_client/_client.py‎
Lines changed: 246 additions & 0 deletions
diff --git a/‎src/llama_stack_client/resources/alpha/alpha.py‎
Lines changed: 66 additions & 0 deletions b/‎src/llama_stack_client/resources/alpha/alpha.py‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎src/llama_stack_client/resources/alpha/eval/eval.py‎
Lines changed: 26 additions & 0 deletions b/‎src/llama_stack_client/resources/alpha/eval/eval.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/llama_stack_client/resources/alpha/eval/jobs.py‎
Lines changed: 8 additions & 0 deletions b/‎src/llama_stack_client/resources/alpha/eval/jobs.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/llama_stack_client/resources/alpha/inference.py‎
Lines changed: 18 additions & 0 deletions b/‎src/llama_stack_client/resources/alpha/inference.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/llama_stack_client/resources/batches.py‎
Lines changed: 18 additions & 0 deletions b/‎src/llama_stack_client/resources/batches.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/llama_stack_client/resources/chat/chat.py‎
Lines changed: 48 additions & 0 deletions b/‎src/llama_stack_client/resources/chat/chat.py‎
Lines changed: 48 additions & 0 deletions
@@ -65,6 +65,9 @@ def benchmarks(self) -> BenchmarksResource:
 
     @cached_property
     def eval(self) -> EvalResource:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return EvalResource(self._client)
 
     @cached_property
@@ -73,6 +76,14 @@ def admin(self) -> AdminResource:
 
     @cached_property
     def inference(self) -> InferenceResource:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return InferenceResource(self._client)
 
     @cached_property
@@ -106,6 +117,9 @@ def benchmarks(self) -> AsyncBenchmarksResource:
 
     @cached_property
     def eval(self) -> AsyncEvalResource:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return AsyncEvalResource(self._client)
 
     @cached_property
@@ -114,6 +128,14 @@ def admin(self) -> AsyncAdminResource:
 
     @cached_property
     def inference(self) -> AsyncInferenceResource:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return AsyncInferenceResource(self._client)
 
     @cached_property
@@ -150,6 +172,9 @@ def benchmarks(self) -> BenchmarksResourceWithRawResponse:
 
     @cached_property
     def eval(self) -> EvalResourceWithRawResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return EvalResourceWithRawResponse(self._alpha.eval)
 
     @cached_property
@@ -158,6 +183,14 @@ def admin(self) -> AdminResourceWithRawResponse:
 
     @cached_property
     def inference(self) -> InferenceResourceWithRawResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return InferenceResourceWithRawResponse(self._alpha.inference)
 
 
@@ -175,6 +208,9 @@ def benchmarks(self) -> AsyncBenchmarksResourceWithRawResponse:
 
     @cached_property
     def eval(self) -> AsyncEvalResourceWithRawResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return AsyncEvalResourceWithRawResponse(self._alpha.eval)
 
     @cached_property
@@ -183,6 +219,14 @@ def admin(self) -> AsyncAdminResourceWithRawResponse:
 
     @cached_property
     def inference(self) -> AsyncInferenceResourceWithRawResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return AsyncInferenceResourceWithRawResponse(self._alpha.inference)
 
 
@@ -200,6 +244,9 @@ def benchmarks(self) -> BenchmarksResourceWithStreamingResponse:
 
     @cached_property
     def eval(self) -> EvalResourceWithStreamingResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return EvalResourceWithStreamingResponse(self._alpha.eval)
 
     @cached_property
@@ -208,6 +255,14 @@ def admin(self) -> AdminResourceWithStreamingResponse:
 
     @cached_property
     def inference(self) -> InferenceResourceWithStreamingResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return InferenceResourceWithStreamingResponse(self._alpha.inference)
 
 
@@ -225,6 +280,9 @@ def benchmarks(self) -> AsyncBenchmarksResourceWithStreamingResponse:
 
     @cached_property
     def eval(self) -> AsyncEvalResourceWithStreamingResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return AsyncEvalResourceWithStreamingResponse(self._alpha.eval)
 
     @cached_property
@@ -233,4 +291,12 @@ def admin(self) -> AsyncAdminResourceWithStreamingResponse:
 
     @cached_property
     def inference(self) -> AsyncInferenceResourceWithStreamingResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return AsyncInferenceResourceWithStreamingResponse(self._alpha.inference)
@@ -45,8 +45,15 @@
 
 
 class EvalResource(SyncAPIResource):
+    """
+    Llama Stack Evaluation API for running evaluations on model and agent candidates.
+    """
+
     @cached_property
     def jobs(self) -> JobsResource:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return JobsResource(self._client)
 
     @cached_property
@@ -254,8 +261,15 @@ def run_eval_alpha(
 
 
 class AsyncEvalResource(AsyncAPIResource):
+    """
+    Llama Stack Evaluation API for running evaluations on model and agent candidates.
+    """
+
     @cached_property
     def jobs(self) -> AsyncJobsResource:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return AsyncJobsResource(self._client)
 
     @cached_property
@@ -483,6 +497,9 @@ def __init__(self, eval: EvalResource) -> None:
 
     @cached_property
     def jobs(self) -> JobsResourceWithRawResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return JobsResourceWithRawResponse(self._eval.jobs)
 
 
@@ -505,6 +522,9 @@ def __init__(self, eval: AsyncEvalResource) -> None:
 
     @cached_property
     def jobs(self) -> AsyncJobsResourceWithRawResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return AsyncJobsResourceWithRawResponse(self._eval.jobs)
 
 
@@ -527,6 +547,9 @@ def __init__(self, eval: EvalResource) -> None:
 
     @cached_property
     def jobs(self) -> JobsResourceWithStreamingResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return JobsResourceWithStreamingResponse(self._eval.jobs)
 
 
@@ -549,4 +572,7 @@ def __init__(self, eval: AsyncEvalResource) -> None:
 
     @cached_property
     def jobs(self) -> AsyncJobsResourceWithStreamingResponse:
+        """
+        Llama Stack Evaluation API for running evaluations on model and agent candidates.
+        """
         return AsyncJobsResourceWithStreamingResponse(self._eval.jobs)
@@ -27,6 +27,10 @@
 
 
 class JobsResource(SyncAPIResource):
+    """
+    Llama Stack Evaluation API for running evaluations on model and agent candidates.
+    """
+
     @cached_property
     def with_raw_response(self) -> JobsResourceWithRawResponse:
         """
@@ -157,6 +161,10 @@ def status(
 
 
 class AsyncJobsResource(AsyncAPIResource):
+    """
+    Llama Stack Evaluation API for running evaluations on model and agent candidates.
+    """
+
     @cached_property
     def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
         """
 
@@ -31,6 +31,15 @@
 
 
 class InferenceResource(SyncAPIResource):
+    """
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
+    """
+
     @cached_property
     def with_raw_response(self) -> InferenceResourceWithRawResponse:
         """
@@ -109,6 +118,15 @@ def rerank(
 
 
 class AsyncInferenceResource(AsyncAPIResource):
+    """
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
+    - LLM models: these models generate "raw" and "chat" (conversational) completions.
+    - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
+    """
+
     @cached_property
     def with_raw_response(self) -> AsyncInferenceResourceWithRawResponse:
         """
 
@@ -35,6 +35,15 @@
 
 
 class BatchesResource(SyncAPIResource):
+    """
+    The API is designed to allow use of openai client libraries for seamless integration.
+
+    This API provides the following extensions:
+     - idempotent batch creation
+
+    Note: This API is currently under active development and may undergo changes.
+    """
+
     @cached_property
     def with_raw_response(self) -> BatchesResourceWithRawResponse:
         """
@@ -228,6 +237,15 @@ def cancel(
 
 
 class AsyncBatchesResource(AsyncAPIResource):
+    """
+    The API is designed to allow use of openai client libraries for seamless integration.
+
+    This API provides the following extensions:
+     - idempotent batch creation
+
+    Note: This API is currently under active development and may undergo changes.
+    """
+
     @cached_property
     def with_raw_response(self) -> AsyncBatchesResourceWithRawResponse:
         """
 
@@ -25,6 +25,14 @@
 class ChatResource(SyncAPIResource):
     @cached_property
     def completions(self) -> CompletionsResource:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return CompletionsResource(self._client)
 
     @cached_property
@@ -50,6 +58,14 @@ def with_streaming_response(self) -> ChatResourceWithStreamingResponse:
 class AsyncChatResource(AsyncAPIResource):
     @cached_property
     def completions(self) -> AsyncCompletionsResource:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return AsyncCompletionsResource(self._client)
 
     @cached_property
@@ -78,6 +94,14 @@ def __init__(self, chat: ChatResource) -> None:
 
     @cached_property
     def completions(self) -> CompletionsResourceWithRawResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return CompletionsResourceWithRawResponse(self._chat.completions)
 
 
@@ -87,6 +111,14 @@ def __init__(self, chat: AsyncChatResource) -> None:
 
     @cached_property
     def completions(self) -> AsyncCompletionsResourceWithRawResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return AsyncCompletionsResourceWithRawResponse(self._chat.completions)
 
 
@@ -96,6 +128,14 @@ def __init__(self, chat: ChatResource) -> None:
 
     @cached_property
     def completions(self) -> CompletionsResourceWithStreamingResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return CompletionsResourceWithStreamingResponse(self._chat.completions)
 
 
@@ -105,4 +145,12 @@ def __init__(self, chat: AsyncChatResource) -> None:
 
     @cached_property
     def completions(self) -> AsyncCompletionsResourceWithStreamingResponse:
+        """
+        Llama Stack Inference API for generating completions, chat completions, and embeddings.
+
+        This API provides the raw interface to the underlying models. Three kinds of models are supported:
+        - LLM models: these models generate "raw" and "chat" (conversational) completions.
+        - Embedding models: these models generate embeddings to be used for semantic search.
+        - Rerank models: these models reorder the documents based on their relevance to a query.
+        """
         return AsyncCompletionsResourceWithStreamingResponse(self._chat.completions)