{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/Info", "title": "Info", "type": "object", "properties": { "model_id": { "type": "string", "description": "Loaded model ID", "example": "500123" }, "model_sha": { "type": "string", "description": "Model revision SHA", "example": "example_value" }, "model_dtype": { "type": "string", "description": "Model data type", "enum": [ "float16", "bfloat16", "float32" ], "example": "float16" }, "model_device_type": { "type": "string", "description": "Device type (cuda, cpu)", "example": "example_value" }, "model_pipeline_tag": { "type": "string", "description": "Pipeline task tag", "example": "example_value" }, "max_concurrent_requests": { "type": "integer", "example": 10 }, "max_best_of": { "type": "integer", "example": 10 }, "max_stop_sequences": { "type": "integer", "example": 10 }, "max_input_length": { "type": "integer", "description": "Maximum input token length", "example": 10 }, "max_total_tokens": { "type": "integer", "description": "Maximum total tokens (input + output)", "example": 10 }, "waiting_served_ratio": { "type": "number", "example": 42.5 }, "max_batch_total_tokens": { "type": "integer", "example": 10 }, "max_waiting_tokens": { "type": "integer", "example": 10 }, "validation_workers": { "type": "integer", "example": 10 }, "max_client_batch_size": { "type": "integer", "example": 10 }, "version": { "type": "string", "description": "TGI version", "example": "example_value" }, "sha": { "type": "string", "description": "TGI build SHA", "example": "example_value" }, "docker_label": { "type": "string", "description": "Docker image label", "example": "example_value" } } }