{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/AiRequestTransformerPluginConfig", "title": "AiRequestTransformerPluginConfig", "x-speakeasy-entity": "PluginAiRequestTransformer", "properties": { "config": { "type": "object", "properties": { "http_proxy_host": { "description": "A string representing a host name, such as example.com.", "type": "string" }, "http_proxy_port": { "description": "An integer representing a port number between 0 and 65535, inclusive.", "type": "integer", "maximum": 65535, "minimum": 0 }, "http_timeout": { "description": "Timeout in milliseconds for the AI upstream service.", "type": "integer", "default": 60000 }, "https_proxy_host": { "description": "A string representing a host name, such as example.com.", "type": "string" }, "https_proxy_port": { "description": "An integer representing a port number between 0 and 65535, inclusive.", "type": "integer", "maximum": 65535, "minimum": 0 }, "https_verify": { "description": "Verify the TLS certificate of the AI upstream service.", "type": "boolean", "default": true }, "llm": { "type": "object", "properties": { "auth": { "type": "object", "properties": { "allow_override": { "description": "If enabled, the authorization header or parameter can be overridden in the request by the value configured in the plugin.", "type": "boolean", "default": false }, "aws_access_key_id": { "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_ACCESS_KEY_ID environment variable for this plugin instance.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "aws_secret_access_key": { "description": "Set this if you are using an AWS provider (Bedrock) and you are authenticating using static IAM User credentials. Setting this will override the AWS_SECRET_ACCESS_KEY environment variable for this plugin instance.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_client_id": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client ID.", "type": "string", "x-referenceable": true }, "azure_client_secret": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the client secret.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "azure_tenant_id": { "description": "If azure_use_managed_identity is set to true, and you need to use a different user-assigned identity for this LLM instance, set the tenant ID.", "type": "string", "x-referenceable": true }, "azure_use_managed_identity": { "description": "Set true to use the Azure Cloud Managed Identity (or user-assigned identity) to authenticate with Azure-provider models.", "type": "boolean", "default": false }, "gcp_metadata_url": { "description": "Custom metadata URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google metadata endpoint.", "type": "string", "x-referenceable": true }, "gcp_oauth_token_url": { "description": "Custom OAuth token URL for GCP authentication. Useful for restricted network environments or custom GCP endpoints. If null, Kong will use the default Google OAuth token endpoint.", "type": "string", "x-referenceable": true }, "gcp_service_account_json": { "description": "Set this field to the full JSON of the GCP service account to authenticate, if required. If null (and gcp_use_service_account is true), Kong will attempt to read from environment variable `GCP_SERVICE_ACCOUNT`.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "gcp_use_service_account": { "description": "Use service account auth for GCP-based providers and models.", "type": "boolean", "default": false }, "header_name": { "description": "If AI model requires authentication via Authorization or API key header, specify its name here.", "type": "string", "x-referenceable": true }, "header_value": { "description": "Specify the full auth header value for 'header_name', for example 'Bearer key' or just 'key'.", "type": "string", "x-encrypted": true, "x-referenceable": true }, "param_location": { "description": "Specify whether the 'param_name' and 'param_value' options go in a query string, or the POST form/JSON body.", "type": "string", "enum": [ "body", "query" ] }, "param_name": { "description": "If AI model requires authentication via query parameter, specify its name here.", "type": "string", "x-referenceable": true }, "param_value": { "description": "Specify the full parameter value for 'param_name'.", "type": "string", "x-encrypted": true, "x-referenceable": true } } }, "description": { "description": "The semantic description of the target, required if using semantic load balancing. Specially, setting this to 'CATCHALL' will indicate such target to be used when no other targets match the semantic threshold. Only used by ai-proxy-advanced.", "type": "string" }, "logging": { "type": "object", "properties": { "log_payloads": { "description": "If enabled, will log the request and response body into the Kong log plugin(s) output.Furthermore if Opentelemetry instrumentation is enabled the traces will contain this data as well.", "type": "boolean", "default": false }, "log_statistics": { "description": "If enabled and supported by the driver, will add model usage and token metrics into the Kong log plugin(s) output.", "type": "boolean", "default": false } } }, "metadata": { "description": "For internal use only. ", "type": "object", "additionalProperties": true, "nullable": true, "x-speakeasy-type-override": "any" }, "model": { "type": "object", "properties": { "model_alias": { "description": "The model name parameter from the request that this model should map to.", "type": "string" }, "name": { "description": "Model name to execute.", "type": "string" }, "options": { "description": "Key/value settings for the model", "type": "object", "properties": { "anthropic_version": { "description": "Defines the schema/API version, if using Anthropic provider.", "type": "string" }, "azure_api_version": { "description": "'api-version' for Azure OpenAI instances.", "type": "string", "default": "2023-05-15" }, "azure_deployment_id": { "description": "Deployment ID for Azure OpenAI instances.", "type": "string" }, "azure_instance": { "description": "Instance name for Azure OpenAI hosted models.", "type": "string" }, "bedrock": { "type": "object", "properties": { "aws_assume_role_arn": { "description": "If using AWS providers (Bedrock) you can assume a different role after authentication with the current IAM context is successful.", "type": "string" }, "aws_region": { "description": "If using AWS providers (Bedrock) you can override the `AWS_REGION` environment variable by setting this option.", "type": "string" }, "aws_role_session_name": { "description": "If using AWS providers (Bedrock), set the identifier of the assumed role session.", "type": "string" }, "aws_sts_endpoint_url": { "description": "If using AWS providers (Bedrock), override the STS endpoint URL when assuming a different role.", "type": "string" }, "batch_bucket_prefix": { "description": "S3 URI prefix (s3://bucket/prefix/) where Bedrock will get input files from and store results to for native batch API.", "type": "string" }, "batch_role_arn": { "description": "AWS role arn used for calling batch API. Try to get the value from request if ommited.", "type": "string" }, "embeddings_normalize": { "description": "If using AWS providers (Bedrock), set to true to normalize the embeddings.", "type": "boolean", "default": false }, "performance_config_latency": { "description": "Force the client's performance configuration 'latency' for all requests. Leave empty to let the consumer select the performance configuration.", "type": "string" }, "video_output_s3_uri": { "description": "S3 URI (s3://bucket/prefix) where Bedrock will store generated video files. Required for video generation.", "type": "string" } } }, "cohere": { "type": "object", "properties": { "embedding_input_type": { "description": "The purpose of the input text to calculate embedding vectors.", "type": "string", "default": "classification", "enum": [ "classification", "clustering", "image", "search_document", "search_query" ] }, "wait_for_model": { "description": "Wait for the model if it is not ready", "type": "boolean" } } }, "dashscope": { "type": "object", "properties": { "international": { "description": "Two Dashscope endpoints are available, and the international endpoint will be used when this is set to `true`.\nIt is recommended to set this to `true` when using international version of dashscope.\n", "type": "boolean", "default": true } } }, "databricks": { "type": "object", "properties": { "workspace_instance_id": { "description": "Workspace Instance ID ('dbc-xxx-yyy') for Databricks model serving.", "type": "string" } } }, "embeddings_dimensions": { "description": "If using embeddings models, set the number of dimensions to generate.", "type": "integer" }, "gemini": { "type": "object", "properties": { "api_endpoint": { "description": "If running Gemini on Vertex, specify the regional API endpoint (hostname only).", "type": "string" }, "endpoint_id": { "description": "If running Gemini on Vertex Model Garden, specify the endpoint ID.", "type": "string" }, "location_id": { "description": "If running Gemini on Vertex, specify the location ID.", "type": "string" }, "project_id": { "description": "If running Gemini on Vertex, specify the project ID.", "type": "string" } } }, "huggingface": { "type": "object", "properties": { "use_cache": { "description": "Use the cache layer on the inference API", "type": "boolean" }, "wait_for_model": { "description": "Wait for the model if it is not ready", "type": "boolean" } } }, "input_cost": { "description": "Defines the cost per 1M tokens in your prompt.", "type": "number" }, "llama2_format": { "description": "If using llama2 provider, select the upstream message format.", "type": "string", "enum": [ "ollama", "openai", "raw" ] }, "max_tokens": { "description": "Defines the max_tokens, if using chat or completion models.", "type": "integer" }, "mistral_format": { "description": "If using mistral provider, select the upstream message format.", "type": "string", "enum": [ "ollama", "openai" ] }, "output_cost": { "description": "Defines the cost per 1M tokens in the output of the AI.", "type": "number" }, "temperature": { "description": "Defines the matching temperature, if using chat or completion models.", "type": "number", "maximum": 5, "minimum": 0 }, "top_k": { "description": "Defines the top-k most likely tokens, if supported.", "type": "integer", "maximum": 500, "minimum": 0 }, "top_p": { "description": "Defines the top-p probability mass, if supported.", "type": "number", "maximum": 1, "minimum": 0 }, "upstream_path": { "description": "Manually specify or override the AI operation path, used when e.g. using the 'preserve' route_type.", "type": "string" }, "upstream_url": { "description": "Manually specify or override the full URL to the AI operation endpoints, when calling (self-)hosted models, or for running via a private endpoint.", "type": "string" } } }, "provider": { "description": "AI provider request format - Kong translates requests to and from the specified backend compatible formats.", "type": "string", "enum": [ "anthropic", "azure", "bedrock", "cerebras", "cohere", "dashscope", "databricks", "deepseek", "gemini", "huggingface", "llama2", "mistral", "ollama", "openai", "vllm", "xai" ] } }, "required": [ "provider" ] }, "route_type": { "description": "The model's operation implementation, for this provider. ", "type": "string", "enum": [ "audio/v1/audio/speech", "audio/v1/audio/transcriptions", "audio/v1/audio/translations", "image/v1/images/edits", "image/v1/images/generations", "llm/v1/assistants", "llm/v1/batches", "llm/v1/chat", "llm/v1/completions", "llm/v1/embeddings", "llm/v1/files", "llm/v1/responses", "preserve", "realtime/v1/realtime", "video/v1/videos/generations" ] }, "weight": { "description": "The weight this target gets within the upstream loadbalancer (1-65535). Only used by ai-proxy-advanced.", "type": "integer", "default": 100, "maximum": 65535, "minimum": 1 } }, "required": [ "model", "route_type" ] }, "max_request_body_size": { "description": "max allowed body size allowed to be introspected. 0 means unlimited, but the size of this body will still be limited by Nginx's client_max_body_size.", "type": "integer", "default": 1048576 }, "prompt": { "description": "Use this prompt to tune the LLM system/assistant message for the incoming proxy request (from the client), and what you are expecting in return.", "type": "string" }, "transformation_extract_pattern": { "description": "Defines the regular expression that must match to indicate a successful AI transformation at the request phase. The first match will be set as the outgoing body. If the AI service's response doesn't match this pattern, it is marked as a failure.", "type": "string" } }, "required": [ "llm", "prompt" ] }, "consumer_group": { "description": "If set, the plugin will activate only for requests where the specified consumer group has been authenticated. (Note that some plugins can not be restricted to consumers groups this way.). Leave unset for the plugin to activate regardless of the authenticated Consumer Groups", "type": "object", "additionalProperties": false, "properties": { "id": { "type": "string" } } }, "name": { "const": "ai-request-transformer" }, "protocols": { "description": "A set of strings representing HTTP protocols.", "type": "array", "items": { "enum": [ "grpc", "grpcs", "http", "https" ], "type": "string" }, "format": "set", "default": [ "grpc", "grpcs", "http", "https" ] }, "route": { "description": "If set, the plugin will only activate when receiving requests via the specified route. Leave unset for the plugin to activate regardless of the route being used.", "type": "object", "additionalProperties": false, "properties": { "id": { "type": "string" } } }, "service": { "description": "If set, the plugin will only activate when receiving requests via one of the routes belonging to the specified Service. Leave unset for the plugin to activate regardless of the Service being matched.", "type": "object", "additionalProperties": false, "properties": { "id": { "type": "string" } } } }, "required": [ "config" ] }