{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/AiProxyAdvancedPluginConfig", "title": "AiProxyAdvancedPluginConfig", "x-speakeasy-entity": "PluginAiProxyAdvanced", "properties": { "config": { "type": "object", "properties": { "acls": { "description": "Optional ACL rules. Deny rules take precedence over allow rules.", "type": "object", "properties": { "allow": { "description": "Requests matching any allow rule are permitted unless also matched by a deny rule.", "type": "array", "items": { "description": "ACL rule composed of one or more match conditions.", "properties": { "match": { "description": "All conditions must match for the rule to apply (logical AND).", "type": "array", "items": { "description": "Single match condition (e.g. user or model value).", "properties": { "key": { "description": "Helper key used by some types: consumer (id|username), consumer_group (id|name), header (header name).", "type": "string" }, "type": { "description": "The attribute to match against.", "type": "string", "enum": [ "consumer", "consumer_group", "header", "ip", "model", "path", "provider" ] }, "values": { "description": "Allowed values for the selected type.", "type": "array", "items": { "type": "string" }, "minLength": 1 } }, "required": [ "type", "values" ], "type": "object" }, "minLength": 1 } }, "required": [ "match" ], "type": "object" }, "minLength": 1 }, "deny": { "description": "Requests matching any deny rule are blocked. Deny rules take precedence over allow rules.", "type": "array", "items": { "description": "ACL rule composed of one or more match conditions.", "properties": { "match": { "description": "All conditions must match for the rule to apply (logical AND).", "type": "array", "items": { "description": "Single match condition (e.g. user or model value).", "properties": { "key": { "description": "Helper key used by some types: consumer (id|username), consumer_group (id|name), header (header name).", "type": "string" }, "type": { "description": "The attribute to match against.", "type": "string", "enum": [ "consumer", "consumer_group", "header", "ip", "model", "path", "provider" ] }, "values": { "description": "Allowed values for the selected type.", "type": "array", "items": { "type": "string" }, "minLength": 1 } }, "required": [ "type", "values" ], "type": "object" }, "minLength": 1 } }, "required": [ "match" ], "type": "object" }, "minLength": 1 } } }, "balancer": { "type": "object", "properties": { "algorithm": { "description": "Which load balancing algorithm to use.", "type": "string", "default": "round-robin", "enum": [ "consistent-hashing", "least-connections", "lowest-latency", "lowest-usage", "priority", "round-robin", "semantic" ] }, "connect_timeout": { "type": "integer", "default": 60000, "maximum": 2147483646, "minimum": 1 }, "fail_timeout": { "description": "The period of time (in milliseconds) the target will be considered unavailable after the number of unsuccessful attempts reaches `max_fails`.", "type": "integer", "default": 10000, "maximum": 2147483646, "minimum": 1 }, "failover_criteria": { "description": "Specifies in which cases an upstream response should be failover to the next target. Each option in the array is equivalent to the function of http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_next_upstream", "type": "array", "items": { "enum": [ "error", "http_403", "http_404", "http_429", "http_500", "http_502", "http_503", "http_504", "invalid_header", "non_idempotent", "timeout" ], "type": "string" }, "default": [ "error", "timeout" ] }, "hash_on_header": { "description": "The header to use for consistent-hashing.", "type": "string", "default": "X-Kong-LLM-Request-ID" }, "latency_strategy": { "description": "What metrics to use for latency. Available values are: `tpot` (time-per-output-token) and `e2e`.", "type": "string", "default": "tpot", "enum": [ "e2e", "tpot" ] }, "max_fails": { "description": "Number of unsuccessful attempts to communicate with a target that should occur in the duration defined by `fail_timeout` before the target is considered unavailable. The zero value disables the circuit breaker. What is considered an unsuccessful attempt is defined by `failover_criteria`. Note the cases of `error`, `timeout` and `invalid_header` are always considered unsuccessful attempts, while the cases of `http_403` and `http_404` are never considered unsuccessful attempts.", "type": "integer", "default": 0, "maximum": 32767, "minimum": 0 }, "read_timeout": { "type": "integer", "default": 60000, "maximum": 2147483646, "minimum": 1 }, "retries": { "description": "The number of retries to execute upon failure to proxy.", "type": "integer", "default": 5, "maximum": 32767, "minimum": 0 }, "slots": { "description": "The number of slots in the load balancer algorithm.", "type": "integer", "default": 10000, "maximum": 65536, "minimum": 10 }, "tokens_count_strategy": { "description": "What tokens to use for usage calculation. Available values are: `total_tokens` `prompt_tokens`, `completion_tokens` and `cost`.", "type": "string", "default": "total-tokens", "enum": [ "completion-tokens", "cost", "llm-accuracy", "prompt-tokens", "total-tokens" ] }, "write_timeout": { "type": "integer", "default": 60000, "maximum": 2147483646, "minimum": 1 } } }, "embeddings": { "type": "object", "properties": { "auth": { "type": "object", "properties": { "allow_override": { "description": "If enabled, the authorization header or parameter can be overridden in the request by the value configured in the plugin.", "type": "boolean", "default": false }, "aws_access_key_id": { "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_ACCESS_KEY_ID environment variable for this plugin instance.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "aws_secret_access_key": { "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_SECRET_ACCESS_KEY environment variable for this plugin instance.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_client_id": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client ID.", "type": "string", "x-referenceable": true }, "azure_client_secret": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client secret.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_tenant_id": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the tenant ID.", "type": "string", "x-referenceable": true }, "azure_use_managed_identity": { "description": "Set true to use the Azure Cloud Managed Identity (or user-assigned identity) to authenticate with Azure-provider models.", "type": "boolean", "default": false }, "gcp_metadata_url": { "description": "Custom metadata URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google metadata endpoint.", "type": "string", "x-referenceable": true }, "gcp_oauth_token_url": { "description": "Custom OAuth token URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google OAuth token endpoint.", "type": "string", "x-referenceable": true }, "gcp_service_account_json": { "description": "Set this field to the full JSON of the GCP service account to authenticate, if required. If null (and gcp_use_service_account is true), Kong will attempt to read from environment variable `GCP_SERVICE_ACCOUNT`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "gcp_use_service_account": { "description": "Use service account auth for GCP-based providers and models.", "type": "boolean", "default": false }, "header_name": { "description": "If AI model requires authentication via Authorization or API key header, specify its name here.", "type": "string", "x-referenceable": true }, "header_value": { "description": "Specify the full auth header value for 'header_name', for example 'Bearer key' or just 'key'.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "param_location": { "description": "Specify whether the 'param_name' and 'param_value' options go in a query string, or the POST form/JSON body.", "type": "string", "enum": [ "body", "query" ] }, "param_name": { "description": "If AI model requires authentication via query parameter, specify its name here.", "type": "string", "x-referenceable": true }, "param_value": { "description": "Specify the full parameter value for 'param_name'.", "type": "string", "x-encrypted": true, "x-referenceable": true } } }, "model": { "type": "object", "properties": { "name": { "description": "Model name to execute.", "type": "string" }, "options": { "description": "Key/value settings for the model", "type": "object", "properties": { "azure": { "type": "object", "properties": { "api_version": { "description": "'api-version' for Azure OpenAI instances.", "type": "string", "default": "2023-05-15" }, "deployment_id": { "description": "Deployment ID for Azure OpenAI instances.", "type": "string" }, "instance": { "description": "Instance name for Azure OpenAI hosted models.", "type": "string" } } }, "bedrock": { "type": "object", "properties": { "aws_assume_role_arn": { "description": "If using AWS providers (Bedrock) you can assume a different role after authentication with the current IAM context is successful.", "type": "string" }, "aws_region": { "description": "If using AWS providers (Bedrock) you can override the `AWS_REGION` environment variable by setting this option.", "type": "string" }, "aws_role_session_name": { "description": "If using AWS providers (Bedrock), set the identifier of the assumed role session.", "type": "string" }, "aws_sts_endpoint_url": { "description": "If using AWS providers (Bedrock), override the STS endpoint URL when assuming a different role.", "type": "string" }, "batch_bucket_prefix": { "description": "S3 URI prefix (s3://bucket/prefix/) where Bedrock will get input files from and store results to for native batch API.", "type": "string" }, "batch_role_arn": { "description": "AWS role arn used for calling batch API. Try to get the value from request if ommited.", "type": "string" }, "embeddings_normalize": { "description": "If using AWS providers (Bedrock), set to true to normalize the embeddings.", "type": "boolean", "default": false }, "performance_config_latency": { "description": "Force the client's performance configuration 'latency' for all requests. Leave empty to let the consumer select the performance configuration.", "type": "string" }, "video_output_s3_uri": { "description": "S3 URI (s3://bucket/prefix) where Bedrock will store generated video files. Required for video generation.", "type": "string" } } }, "gemini": { "type": "object", "properties": { "api_endpoint": { "description": "If running Gemini on Vertex, specify the regional API endpoint (hostname only).", "type": "string" }, "location_id": { "description": "If running Gemini on Vertex, specify the location ID.", "type": "string" }, "project_id": { "description": "If running Gemini on Vertex, specify the project ID.", "type": "string" } } }, "huggingface": { "type": "object", "properties": { "use_cache": { "description": "Use the cache layer on the inference API", "type": "boolean" }, "wait_for_model": { "description": "Wait for the model if it is not ready", "type": "boolean" } } }, "upstream_url": { "description": "upstream url for the embeddings", "type": "string" } } }, "provider": { "description": "AI provider format to use for embeddings API", "type": "string", "enum": [ "azure", "bedrock", "gemini", "huggingface", "mistral", "ollama", "openai" ] } }, "required": [ "name", "provider" ] } }, "required": [ "model" ] }, "genai_category": { "description": "Generative AI category of the request", "type": "string", "default": "text/generation", "enum": [ "audio/speech", "audio/transcription", "image/generation", "realtime/generation", "text/embeddings", "text/generation", "video/generation" ] }, "llm_format": { "description": "LLM input and output format and schema to use", "type": "string", "default": "openai", "enum": [ "anthropic", "bedrock", "cohere", "gemini", "huggingface", "openai" ] }, "max_request_body_size": { "description": "max allowed body size allowed to be introspected. 0 means unlimited, but the size of this body will still be limited by Nginx's client_max_body_size.", "type": "integer", "default": 1048576 }, "model_name_header": { "description": "Display the model name selected in the X-Kong-LLM-Model response header", "type": "boolean", "default": true }, "response_streaming": { "description": "Whether to 'optionally allow', 'deny', or 'always' (force) the streaming of answers via server sent events.", "type": "string", "default": "allow", "enum": [ "allow", "always", "deny" ] }, "targets": { "type": "array", "items": { "properties": { "auth": { "type": "object", "properties": { "allow_override": { "description": "If enabled, the authorization header or parameter can be overridden in the request by the value configured in the plugin.", "type": "boolean", "default": false }, "aws_access_key_id": { "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_ACCESS_KEY_ID environment variable for this plugin instance.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "aws_secret_access_key": { "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_SECRET_ACCESS_KEY environment variable for this plugin instance.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_client_id": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client ID.", "type": "string", "x-referenceable": true }, "azure_client_secret": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client secret.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_tenant_id": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the tenant ID.", "type": "string", "x-referenceable": true }, "azure_use_managed_identity": { "description": "Set true to use the Azure Cloud Managed Identity (or user-assigned identity) to authenticate with Azure-provider models.", "type": "boolean", "default": false }, "gcp_metadata_url": { "description": "Custom metadata URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google metadata endpoint.", "type": "string", "x-referenceable": true }, "gcp_oauth_token_url": { "description": "Custom OAuth token URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google OAuth token endpoint.", "type": "string", "x-referenceable": true }, "gcp_service_account_json": { "description": "Set this field to the full JSON of the GCP service account to authenticate, if required. If null (and gcp_use_service_account is true), Kong will attempt to read from environment variable `GCP_SERVICE_ACCOUNT`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "gcp_use_service_account": { "description": "Use service account auth for GCP-based providers and models.", "type": "boolean", "default": false }, "header_name": { "description": "If AI model requires authentication via Authorization or API key header, specify its name here.", "type": "string", "x-referenceable": true }, "header_value": { "description": "Specify the full auth header value for 'header_name', for example 'Bearer key' or just 'key'.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "param_location": { "description": "Specify whether the 'param_name' and 'param_value' options go in a query string, or the POST form/JSON body.", "type": "string", "enum": [ "body", "query" ] }, "param_name": { "description": "If AI model requires authentication via query parameter, specify its name here.", "type": "string", "x-referenceable": true }, "param_value": { "description": "Specify the full parameter value for 'param_name'.", "type": "string", "x-encrypted": true, "x-referenceable": true } } }, "description": { "description": "The semantic description of the target, required if using semantic load balancing. Specially, setting this to 'CATCHALL' will indicate such target to be used when no other targets match the semantic threshold. Only used by ai-proxy-advanced.", "type": "string" }, "logging": { "type": "object", "properties": { "log_payloads": { "description": "If enabled, will log the request and response body into the Kong log plugin(s) output.Furthermore if Opentelemetry instrumentation is enabled the traces will contain this data as well.", "type": "boolean", "default": false }, "log_statistics": { "description": "If enabled and supported by the driver, will add model usage and token metrics into the Kong log plugin(s) output.", "type": "boolean", "default": false } } }, "metadata": { "description": "For internal use only. ", "type": "object", "additionalProperties": true, "nullable": true, "x-speakeasy-type-override": "any" }, "model": { "type": "object", "properties": { "model_alias": { "description": "The model name parameter from the request that this model should map to.", "type": "string" }, "name": { "description": "Model name to execute.", "type": "string" }, "options": { "description": "Key/value settings for the model", "type": "object", "properties": { "anthropic_version": { "description": "Defines the schema/API version, if using Anthropic provider.", "type": "string" }, "azure_api_version": { "description": "'api-version' for Azure OpenAI instances.", "type": "string", "default": "2023-05-15" }, "azure_deployment_id": { "description": "Deployment ID for Azure OpenAI instances.", "type": "string" }, "azure_instance": { "description": "Instance name for Azure OpenAI hosted models.", "type": "string" }, "bedrock": { "type": "object", "properties": { "aws_assume_role_arn": { "description": "If using AWS providers (Bedrock) you can assume a different role after authentication with the current IAM context is successful.", "type": "string" }, "aws_region": { "description": "If using AWS providers (Bedrock) you can override the `AWS_REGION` environment variable by setting this option.", "type": "string" }, "aws_role_session_name": { "description": "If using AWS providers (Bedrock), set the identifier of the assumed role session.", "type": "string" }, "aws_sts_endpoint_url": { "description": "If using AWS providers (Bedrock), override the STS endpoint URL when assuming a different role.", "type": "string" }, "batch_bucket_prefix": { "description": "S3 URI prefix (s3://bucket/prefix/) where Bedrock will get input files from and store results to for native batch API.", "type": "string" }, "batch_role_arn": { "description": "AWS role arn used for calling batch API. Try to get the value from request if ommited.", "type": "string" }, "embeddings_normalize": { "description": "If using AWS providers (Bedrock), set to true to normalize the embeddings.", "type": "boolean", "default": false }, "performance_config_latency": { "description": "Force the client's performance configuration 'latency' for all requests. Leave empty to let the consumer select the performance configuration.", "type": "string" }, "video_output_s3_uri": { "description": "S3 URI (s3://bucket/prefix) where Bedrock will store generated video files. Required for video generation.", "type": "string" } } }, "cohere": { "type": "object", "properties": { "embedding_input_type": { "description": "The purpose of the input text to calculate embedding vectors.", "type": "string", "default": "classification", "enum": [ "classification", "clustering", "image", "search_document", "search_query" ] }, "wait_for_model": { "description": "Wait for the model if it is not ready", "type": "boolean" } } }, "dashscope": { "type": "object", "properties": { "international": { "description": "Two Dashscope endpoints are available, and the international endpoint will be used when this is set to `true`.\nIt is recommended to set this to `true` when using international version of dashscope.\n", "type": "boolean", "default": true } } }, "databricks": { "type": "object", "properties": { "workspace_instance_id": { "description": "Workspace Instance ID ('dbc-xxx-yyy') for Databricks model serving.", "type": "string" } } }, "embeddings_dimensions": { "description": "If using embeddings models, set the number of dimensions to generate.", "type": "integer" }, "gemini": { "type": "object", "properties": { "api_endpoint": { "description": "If running Gemini on Vertex, specify the regional API endpoint (hostname only).", "type": "string" }, "endpoint_id": { "description": "If running Gemini on Vertex Model Garden, specify the endpoint ID.", "type": "string" }, "location_id": { "description": "If running Gemini on Vertex, specify the location ID.", "type": "string" }, "project_id": { "description": "If running Gemini on Vertex, specify the project ID.", "type": "string" } } }, "huggingface": { "type": "object", "properties": { "use_cache": { "description": "Use the cache layer on the inference API", "type": "boolean" }, "wait_for_model": { "description": "Wait for the model if it is not ready", "type": "boolean" } } }, "input_cost": { "description": "Defines the cost per 1M tokens in your prompt.", "type": "number" }, "llama2_format": { "description": "If using llama2 provider, select the upstream message format.", "type": "string", "enum": [ "ollama", "openai", "raw" ] }, "max_tokens": { "description": "Defines the max_tokens, if using chat or completion models.", "type": "integer" }, "mistral_format": { "description": "If using mistral provider, select the upstream message format.", "type": "string", "enum": [ "ollama", "openai" ] }, "output_cost": { "description": "Defines the cost per 1M tokens in the output of the AI.", "type": "number" }, "temperature": { "description": "Defines the matching temperature, if using chat or completion models.", "type": "number", "maximum": 5, "minimum": 0 }, "top_k": { "description": "Defines the top-k most likely tokens, if supported.", "type": "integer", "maximum": 500, "minimum": 0 }, "top_p": { "description": "Defines the top-p probability mass, if supported.", "type": "number", "maximum": 1, "minimum": 0 }, "upstream_path": { "description": "Manually specify or override the AI operation path, used when e.g. using the 'preserve' route_type.", "type": "string" }, "upstream_url": { "description": "Manually specify or override the full URL to the AI operation endpoints, when calling (self-)hosted models, or for running via a private endpoint.", "type": "string" } } }, "provider": { "description": "AI provider request format - Kong translates requests to and from the specified backend compatible formats.", "type": "string", "enum": [ "anthropic", "azure", "bedrock", "cerebras", "cohere", "dashscope", "databricks", "deepseek", "gemini", "huggingface", "llama2", "mistral", "ollama", "openai", "vllm", "xai" ] } }, "required": [ "provider" ] }, "route_type": { "description": "The model's operation implementation, for this provider. ", "type": "string", "enum": [ "audio/v1/audio/speech", "audio/v1/audio/transcriptions", "audio/v1/audio/translations", "image/v1/images/edits", "image/v1/images/generations", "llm/v1/assistants", "llm/v1/batches", "llm/v1/chat", "llm/v1/completions", "llm/v1/embeddings", "llm/v1/files", "llm/v1/responses", "preserve", "realtime/v1/realtime", "video/v1/videos/generations" ] }, "weight": { "description": "The weight this target gets within the upstream loadbalancer (1-65535). Only used by ai-proxy-advanced.", "type": "integer", "default": 100, "maximum": 65535, "minimum": 1 } }, "required": [ "model", "route_type" ], "type": "object" } }, "vectordb": { "type": "object", "properties": { "dimensions": { "description": "the desired dimensionality for the vectors", "type": "integer" }, "distance_metric": { "description": "the distance metric to use for vector searches", "type": "string", "enum": [ "cosine", "euclidean" ] }, "pgvector": { "type": "object", "properties": { "database": { "description": "the database of the pgvector database", "type": "string", "default": "kong-pgvector" }, "host": { "description": "the host of the pgvector database", "type": "string", "default": "127.0.0.1" }, "password": { "description": "the password of the pgvector database", "type": "string", "x-encrypted": true, "x-referenceable": true }, "port": { "description": "the port of the pgvector database", "type": "integer", "default": 5432 }, "ssl": { "description": "whether to use ssl for the pgvector database", "type": "boolean", "default": false }, "ssl_cert": { "description": "the path of ssl cert to use for the pgvector database", "type": "string" }, "ssl_cert_key": { "description": "the path of ssl cert key to use for the pgvector database", "type": "string" }, "ssl_required": { "description": "whether ssl is required for the pgvector database", "type": "boolean", "default": false }, "ssl_verify": { "description": "whether to verify ssl for the pgvector database", "type": "boolean", "default": true }, "ssl_version": { "description": "the ssl version to use for the pgvector database", "type": "string", "default": "tlsv1_2", "enum": [ "any", "tlsv1_2", "tlsv1_3" ] }, "timeout": { "description": "the timeout of the pgvector database", "type": "number", "default": 5000 }, "user": { "description": "the user of the pgvector database", "type": "string", "default": "postgres", "x-referenceable": true } } }, "redis": { "type": "object", "properties": { "cloud_authentication": { "description": "Cloud auth related configs for connecting to a Cloud Provider's Redis instance.", "type": "object", "properties": { "auth_provider": { "description": "Auth providers to be used to authenticate to a Cloud Provider's Redis instance.", "type": "string", "enum": [ "aws", "azure", "gcp" ], "x-referenceable": true }, "aws_access_key_id": { "description": "AWS Access Key ID to be used for authentication when `auth_provider` is set to `aws`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "aws_assume_role_arn": { "description": "The ARN of the IAM role to assume for generating ElastiCache IAM authentication tokens.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "aws_cache_name": { "description": "The name of the AWS Elasticache cluster when `auth_provider` is set to `aws`.", "type": "string", "x-referenceable": true }, "aws_is_serverless": { "description": "This flag specifies whether the cluster is serverless when auth_provider is set to `aws`.", "type": "boolean", "default": true }, "aws_region": { "description": "The region of the AWS ElastiCache cluster when `auth_provider` is set to `aws`.", "type": "string", "x-referenceable": true }, "aws_role_session_name": { "description": "The session name for the temporary credentials when assuming the IAM role.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "aws_secret_access_key": { "description": "AWS Secret Access Key to be used for authentication when `auth_provider` is set to `aws`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_client_id": { "description": "Azure Client ID to be used for authentication when `auth_provider` is set to `azure`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_client_secret": { "description": "Azure Client Secret to be used for authentication when `auth_provider` is set to `azure`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_tenant_id": { "description": "Azure Tenant ID to be used for authentication when `auth_provider` is set to `azure`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "gcp_service_account_json": { "description": "GCP Service Account JSON to be used for authentication when `auth_provider` is set to `gcp`.", "type": "string", "x-encrypted": true, "x-referenceable": true } } }, "cluster_max_redirections": { "description": "Maximum retry attempts for redirection.", "type": "integer", "default": 5 }, "cluster_nodes": { "description": "Cluster addresses to use for Redis connections when the `redis` strategy is defined. Defining this field implies using a Redis Cluster. The minimum length of the array is 1 element.", "type": "array", "items": { "properties": { "ip": { "description": "A string representing a host name, such as example.com.", "type": "string", "default": "127.0.0.1" }, "port": { "description": "An integer representing a port number between 0 and 65535, inclusive.", "type": "integer", "default": 6379, "maximum": 65535, "minimum": 0 } }, "type": "object" }, "minLength": 1 }, "connect_timeout": { "description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2.", "type": "integer", "default": 2000, "maximum": 2147483646, "minimum": 0 }, "connection_is_proxied": { "description": "If the connection to Redis is proxied (e.g. Envoy), set it `true`. Set the `host` and `port` to point to the proxy address.", "type": "boolean", "default": false }, "database": { "description": "Database to use for the Redis connection when using the `redis` strategy", "type": "integer", "default": 0 }, "host": { "description": "A string representing a host name, such as example.com.", "type": "string", "default": "127.0.0.1", "x-referenceable": true }, "keepalive_backlog": { "description": "Limits the total number of opened connections for a pool. If the connection pool is full, connection queues above the limit go into the backlog queue. If the backlog queue is full, subsequent connect operations fail and return `nil`. Queued operations (subject to set timeouts) resume once the number of connections in the pool is less than `keepalive_pool_size`. If latency is high or throughput is low, try increasing this value. Empirically, this value is larger than `keepalive_pool_size`.", "type": "integer", "maximum": 2147483646, "minimum": 0 }, "keepalive_pool_size": { "description": "The size limit for every cosocket connection pool associated with every remote server, per worker process. If neither `keepalive_pool_size` nor `keepalive_backlog` is specified, no pool is created. If `keepalive_pool_size` isn't specified but `keepalive_backlog` is specified, then the pool uses the default value. Try to increase (e.g. 512) this value if latency is high or throughput is low.", "type": "integer", "default": 256, "maximum": 2147483646, "minimum": 1 }, "password": { "description": "Password to use for Redis connections. If undefined, no AUTH commands are sent to Redis.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "port": { "description": "An integer representing a port number between 0 and 65535, inclusive.", "type": "integer", "default": 6379, "maximum": 65535, "minimum": 0, "x-referenceable": true }, "read_timeout": { "description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2.", "type": "integer", "default": 2000, "maximum": 2147483646, "minimum": 0 }, "send_timeout": { "description": "An integer representing a timeout in milliseconds. Must be between 0 and 2^31-2.", "type": "integer", "default": 2000, "maximum": 2147483646, "minimum": 0 }, "sentinel_master": { "description": "Sentinel master to use for Redis connections. Defining this value implies using Redis Sentinel.", "type": "string" }, "sentinel_nodes": { "description": "Sentinel node addresses to use for Redis connections when the `redis` strategy is defined. Defining this field implies using a Redis Sentinel. The minimum length of the array is 1 element.", "type": "array", "items": { "properties": { "host": { "description": "A string representing a host name, such as example.com.", "type": "string", "default": "127.0.0.1" }, "port": { "description": "An integer representing a port number between 0 and 65535, inclusive.", "type": "integer", "default": 6379, "maximum": 65535, "minimum": 0 } }, "type": "object" }, "minLength": 1 }, "sentinel_password": { "description": "Sentinel password to authenticate with a Redis Sentinel instance. If undefined, no AUTH commands are sent to Redis Sentinels.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "sentinel_role": { "description": "Sentinel role to use for Redis connections when the `redis` strategy is defined. Defining this value implies using Redis Sentinel.", "type": "string", "enum": [ "any", "master", "slave" ] }, "sentinel_username": { "description": "Sentinel username to authenticate with a Redis Sentinel instance. If undefined, ACL authentication won't be performed. This requires Redis v6.2.0+.", "type": "string", "x-referenceable": true }, "server_name": { "description": "A string representing an SNI (server name indication) value for TLS.", "type": "string", "x-referenceable": true }, "ssl": { "description": "If set to true, uses SSL to connect to Redis.", "type": "boolean", "default": false }, "ssl_verify": { "description": "If set to true, verifies the validity of the server SSL certificate. If setting this parameter, also configure `lua_ssl_trusted_certificate` in `kong.conf` to specify the CA (or server) certificate used by your Redis server. You may also need to configure `lua_ssl_verify_depth` accordingly.", "type": "boolean", "default": true }, "username": { "description": "Username to use for Redis connections. If undefined, ACL authentication won't be performed. This requires Redis v6.0.0+. To be compatible with Redis v5.x.y, you can set it to `default`.", "type": "string", "x-referenceable": true } } }, "strategy": { "description": "which vector database driver to use", "type": "string", "enum": [ "pgvector", "redis" ] }, "threshold": { "description": "the default similarity threshold for accepting semantic search results (float). Higher threshold means more results are considered similar.", "type": "number" } }, "required": [ "dimensions", "distance_metric", "strategy" ] } }, "required": [ "targets" ] }, "consumer": { "description": "If set, the plugin will activate only for requests where the specified has been authenticated. (Note that some plugins can not be restricted to consumers this way.). Leave unset for the plugin to activate regardless of the authenticated Consumer.", "type": "object", "additionalProperties": false, "properties": { "id": { "type": "string" } } }, "consumer_group": { "description": "If set, the plugin will activate only for requests where the specified consumer group has been authenticated. (Note that some plugins can not be restricted to consumers groups this way.). Leave unset for the plugin to activate regardless of the authenticated Consumer Groups", "type": "object", "additionalProperties": false, "properties": { "id": { "type": "string" } } }, "name": { "const": "ai-proxy-advanced" }, "protocols": { "description": "A list of the request protocols that will trigger this plugin. The default value, as well as the possible values allowed on this field, may change depending on the plugin type. For example, plugins that only work in stream mode will only support tcp and tls.", "type": "array", "items": { "enum": [ "grpc", "grpcs", "http", "https", "ws", "wss" ], "type": "string" }, "format": "set", "default": [ "grpc", "grpcs", "http", "https", "ws", "wss" ] }, "route": { "description": "If set, the plugin will only activate when receiving requests via the specified route. Leave unset for the plugin to activate regardless of the route being used.", "type": "object", "additionalProperties": false, "properties": { "id": { "type": "string" } } }, "service": { "description": "If set, the plugin will only activate when receiving requests via one of the routes belonging to the specified Service. Leave unset for the plugin to activate regardless of the Service being matched.", "type": "object", "additionalProperties": false, "properties": { "id": { "type": "string" } } } }, "required": [ "config" ] }