//------------------------------------------------------------ // Copyright (c) Microsoft Corporation. All rights reserved. //------------------------------------------------------------ namespace Microsoft.Azure.Cosmos { using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; /// /// Defines a contract for generating float32 vector embeddings from input text strings /// supplied by the Azure Cosmos DB query pipeline. /// The SDK invokes this when a query plan contains GenerateEmbeddings(...) literals /// (for example VectorDistance(GenerateEmbeddings("big brown cat"), c.embedding)). /// Set a client-wide default via CosmosClientOptions.EmbeddingGenerator or /// CosmosClientBuilder.WithEmbeddingGenerator. Implementations MUST be thread-safe and are /// responsible for any caching, retries, and authentication required to call the underlying /// embedding service. /// /// /// Preview surface. The SDK call site that invokes this method is delivered /// in a follow-up release. Setting an instance via /// or /// has no runtime effect /// today; the surface is shipped in this preview so customers can author and test /// implementations against the contract ahead of the resolver landing. /// Lifecycle and disposal. The customer owns the generator instance. The SDK /// keeps a reference for the lifetime of the configured (or the /// reference it was bound to) but never disposes it. If the /// implementation holds disposable resources (for example an HttpClient or an /// EmbeddingClient), the customer is responsible for disposing them when their /// application tears down. /// /// Error semantics. Implementations are responsible for handling transient /// failures from the underlying embedding service (network errors, rate limiting, etc.) /// via their own retry policy. The SDK does not retry calls to this method. Any exception /// thrown by the implementation is wrapped into a and /// surfaced to the originating SDK caller. /// /// Cancellation. Implementations should honor the supplied /// cooperatively wherever feasible (typically by forwarding /// it to the underlying HTTP call). Best-effort cancellation is acceptable; ignoring the /// token entirely is discouraged because it defeats caller-side timeouts. /// /// Idempotency and concurrency. The SDK may invoke this method multiple times /// for the same inputs (for example during internal query retry) and may invoke it /// concurrently from multiple threads. Implementations must be safe to call repeatedly /// and from parallel callers, and must not assume per-call state. Note that each call /// typically incurs cost at the underlying embedding service; implementations may cache /// responses internally if they want to avoid duplicate billing for identical inputs. /// #if PREVIEW public #else internal #endif interface ICosmosEmbeddingGenerator { /// /// Generates an embedding vector for each of the supplied input strings. /// /// /// The input strings to embed, in the order the implementation MUST preserve in the /// returned (one vector per input, same /// index). Typed as so implementations can size their /// outbound batch without re-enumeration and so the 1:1 ordered contract is encoded /// in the signature. /// /// /// The embedding service endpoint to call (for example the Azure OpenAI account endpoint). /// Sourced from the container's EmbeddingSource.Endpoint when configured. /// /// /// The model deployment name to invoke at . Sourced from the /// container's EmbeddingSource.DeploymentName when configured. /// /// /// The vector dimensionality the produced embeddings must match. For models that support /// dimensionality reduction (for example text-embedding-3-small / /// text-embedding-3-large), implementations MUST forward this value to the /// underlying service so the returned vectors have the expected length; otherwise the /// service returns its default size, which may not match the container's /// . /// /// /// A propagated from the originating SDK call /// (for example FeedIterator.ReadNextAsync). Implementations should honor cancellation. /// /// /// A task that resolves to a whose /// contains one float32 vector per input, /// each of length , in the same order as /// . /// /// Query-time vectors are sent to the Azure Cosmos DB gateway as float32 regardless of /// the container's stored . Implementations targeting /// containers configured for , /// , or storage /// should still produce float32 vectors here; the Azure Cosmos DB service applies the /// configured quantization at write time. This contract /// covers all four storage configurations supported by /// the container's . /// /// Task GenerateEmbeddingsAsync( IReadOnlyList texts, string endpoint, string deploymentName, int dimensions, CancellationToken cancellationToken = default); } }