{ "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Info", "type": "object", "properties": { "model_id": { "type": "string", "description": "Loaded model ID" }, "model_sha": { "type": "string", "description": "Model revision SHA" }, "model_dtype": { "type": "string", "description": "Model data type" }, "model_device_type": { "type": "string", "description": "Device type (cuda, cpu)" }, "model_pipeline_tag": { "type": "string", "description": "Pipeline task tag" }, "max_concurrent_requests": { "type": "integer" }, "max_best_of": { "type": "integer" }, "max_stop_sequences": { "type": "integer" }, "max_input_length": { "type": "integer", "description": "Maximum input token length" }, "max_total_tokens": { "type": "integer", "description": "Maximum total tokens (input + output)" }, "waiting_served_ratio": { "type": "number" }, "max_batch_total_tokens": { "type": "integer" }, "max_waiting_tokens": { "type": "integer" }, "validation_workers": { "type": "integer" }, "max_client_batch_size": { "type": "integer" }, "version": { "type": "string", "description": "TGI version" }, "sha": { "type": "string", "description": "TGI build SHA" }, "docker_label": { "type": "string", "description": "Docker image label" } } }