//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------
namespace Microsoft.Azure.Cosmos
{
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
///
/// Defines a contract for generating float32 vector embeddings from input text strings
/// supplied by the Azure Cosmos DB query pipeline.
/// The SDK invokes this when a query plan contains GenerateEmbeddings(...) literals
/// (for example VectorDistance(GenerateEmbeddings("big brown cat"), c.embedding)).
/// Set a client-wide default via CosmosClientOptions.EmbeddingGenerator or
/// CosmosClientBuilder.WithEmbeddingGenerator. Implementations MUST be thread-safe and are
/// responsible for any caching, retries, and authentication required to call the underlying
/// embedding service.
///
///
/// Preview surface. The SDK call site that invokes this method is delivered
/// in a follow-up release. Setting an instance via
/// or
/// has no runtime effect
/// today; the surface is shipped in this preview so customers can author and test
/// implementations against the contract ahead of the resolver landing.
/// Lifecycle and disposal. The customer owns the generator instance. The SDK
/// keeps a reference for the lifetime of the configured (or the
/// reference it was bound to) but never disposes it. If the
/// implementation holds disposable resources (for example an HttpClient or an
/// EmbeddingClient), the customer is responsible for disposing them when their
/// application tears down.
///
/// Error semantics. Implementations are responsible for handling transient
/// failures from the underlying embedding service (network errors, rate limiting, etc.)
/// via their own retry policy. The SDK does not retry calls to this method. Any exception
/// thrown by the implementation is wrapped into a and
/// surfaced to the originating SDK caller.
///
/// Cancellation. Implementations should honor the supplied
/// cooperatively wherever feasible (typically by forwarding
/// it to the underlying HTTP call). Best-effort cancellation is acceptable; ignoring the
/// token entirely is discouraged because it defeats caller-side timeouts.
///
/// Idempotency and concurrency. The SDK may invoke this method multiple times
/// for the same inputs (for example during internal query retry) and may invoke it
/// concurrently from multiple threads. Implementations must be safe to call repeatedly
/// and from parallel callers, and must not assume per-call state. Note that each call
/// typically incurs cost at the underlying embedding service; implementations may cache
/// responses internally if they want to avoid duplicate billing for identical inputs.
///
#if PREVIEW
public
#else
internal
#endif
interface ICosmosEmbeddingGenerator
{
///
/// Generates an embedding vector for each of the supplied input strings.
///
///
/// The input strings to embed, in the order the implementation MUST preserve in the
/// returned (one vector per input, same
/// index). Typed as so implementations can size their
/// outbound batch without re-enumeration and so the 1:1 ordered contract is encoded
/// in the signature.
///
///
/// The embedding service endpoint to call (for example the Azure OpenAI account endpoint).
/// Sourced from the container's EmbeddingSource.Endpoint when configured.
///
///
/// The model deployment name to invoke at . Sourced from the
/// container's EmbeddingSource.DeploymentName when configured.
///
///
/// The vector dimensionality the produced embeddings must match. For models that support
/// dimensionality reduction (for example text-embedding-3-small /
/// text-embedding-3-large), implementations MUST forward this value to the
/// underlying service so the returned vectors have the expected length; otherwise the
/// service returns its default size, which may not match the container's
/// .
///
///
/// A propagated from the originating SDK call
/// (for example FeedIterator.ReadNextAsync). Implementations should honor cancellation.
///
///
/// A task that resolves to a whose
/// contains one float32 vector per input,
/// each of length , in the same order as
/// .
///
/// Query-time vectors are sent to the Azure Cosmos DB gateway as float32 regardless of
/// the container's stored . Implementations targeting
/// containers configured for ,
/// , or storage
/// should still produce float32 vectors here; the Azure Cosmos DB service applies the
/// configured quantization at write time. This contract
/// covers all four storage configurations supported by
/// the container's .
///
///
Task GenerateEmbeddingsAsync(
IReadOnlyList texts,
string endpoint,
string deploymentName,
int dimensions,
CancellationToken cancellationToken = default);
}
}