openapi: 3.0.1 info: title: GroqCloud API description: Specification of the Groq cloud API termsOfService: https://groq.com/terms-of-use/ contact: name: Groq Support email: support@groq.com version: '2.1' servers: - url: https://api.groq.com paths: /openai/v1/audio/speech: post: operationId: createSpeech tags: - Audio summary: Generates audio from the input text. requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/CreateSpeechRequest' responses: '200': description: OK headers: Transfer-Encoding: schema: type: string description: chunked content: audio/wav: schema: type: string format: binary x-groq-metadata: returns: Returns an audio file in `wav` format. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/audio/speech \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "model": "playai-tts", "input": "I love building and shipping new features for our users!", "voice": "Fritz-PlayAI", "response_format": "wav" }' py: | import os from groq import Groq client = Groq(api_key=os.environ.get("GROQ_API_KEY")) speech_file_path = "speech.wav" model = "playai-tts" voice = "Fritz-PlayAI" text = "I love building and shipping new features for our users!" response_format = "wav" response = client.audio.speech.create( model=model, voice=voice, input=text, response_format=response_format ) response.write_to_file(speech_file_path) js: | import fs from "fs"; import path from "path"; import Groq from 'groq-sdk'; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); const speechFilePath = "speech.wav"; const model = "playai-tts"; const voice = "Fritz-PlayAI"; const text = "I love building and shipping new features for our users!"; const responseFormat = "wav"; async function main() { const response = await groq.audio.speech.create({ model: model, voice: voice, input: text, response_format: responseFormat }); const buffer = Buffer.from(await response.arrayBuffer()); await fs.promises.writeFile(speechFilePath, buffer); } main(); /openai/v1/audio/transcriptions: post: operationId: createTranscription tags: - Audio summary: Transcribes audio into the input language. requestBody: required: true content: multipart/form-data: schema: $ref: '#/components/schemas/CreateTranscriptionRequest' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/CreateTranscriptionResponseJson' x-groq-metadata: returns: Returns an audio transcription object. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/audio/transcriptions \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: multipart/form-data" \ -F file="@./sample_audio.m4a" \ -F model="whisper-large-v3" py: | import os from groq import Groq client = Groq() filename = os.path.dirname(__file__) + "/sample_audio.m4a" with open(filename, "rb") as file: transcription = client.audio.transcriptions.create( file=(filename, file.read()), model="whisper-large-v3", prompt="Specify context or spelling", # Optional response_format="json", # Optional language="en", # Optional temperature=0.0 # Optional ) print(transcription.text) js: | import fs from "fs"; import Groq from "groq-sdk"; const groq = new Groq(); async function main() { const transcription = await groq.audio.transcriptions.create({ file: fs.createReadStream("sample_audio.m4a"), model: "whisper-large-v3", prompt: "Specify context or spelling", // Optional response_format: "json", // Optional language: "en", // Optional temperature: 0.0, // Optional }); console.log(transcription.text); } main(); response: | { "text": "Your transcribed text appears here...", "x_groq": { "id": "req_unique_id" } } /openai/v1/audio/translations: post: operationId: createTranslation tags: - Audio summary: Translates audio into English. requestBody: required: true content: multipart/form-data: schema: $ref: '#/components/schemas/CreateTranslationRequest' responses: '200': description: OK content: text/plain: schema: type: string application/json: schema: $ref: '#/components/schemas/CreateTranslationResponseJson' x-groq-metadata: returns: Returns an audio translation object. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/audio/translations \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: multipart/form-data" \ -F file="@./sample_audio.m4a" \ -F model="whisper-large-v3" py: | # Default import os from groq import Groq client = Groq() filename = os.path.dirname(__file__) + "/sample_audio.m4a" with open(filename, "rb") as file: translation = client.audio.translations.create( file=(filename, file.read()), model="whisper-large-v3", prompt="Specify context or spelling", # Optional response_format="json", # Optional temperature=0.0 # Optional ) print(translation.text) js: | // Default import fs from "fs"; import Groq from "groq-sdk"; const groq = new Groq(); async function main() { const translation = await groq.audio.translations.create({ file: fs.createReadStream("sample_audio.m4a"), model: "whisper-large-v3", prompt: "Specify context or spelling", // Optional response_format: "json", // Optional temperature: 0.0, // Optional }); console.log(translation.text); } main(); response: | { "text": "Your translated text appears here...", "x_groq": { "id": "req_unique_id" } } /openai/v1/batches: post: summary: Creates and executes a batch from an uploaded file of requests. [Learn more](/docs/batch). operationId: createBatch tags: - Batch requestBody: required: true content: application/json: schema: type: object required: - input_file_id - endpoint - completion_window properties: input_file_id: type: string description: > The ID of an uploaded file that contains requests for the new batch. See [upload file](/docs/api-reference#files-upload) for how to upload a file. Your input file must be formatted as a [JSONL file](/docs/batch), and must be uploaded with the purpose `batch`. The file can be up to 100 MB in size. endpoint: type: string enum: - /v1/chat/completions description: >- The endpoint to be used for all requests in the batch. Currently `/v1/chat/completions` is supported. completion_window: type: string description: >- The time frame within which the batch should be processed. Durations from `24h` to `7d` are supported. metadata: type: object additionalProperties: type: string description: Optional custom metadata for the batch. nullable: true responses: '200': description: Batch created successfully. content: application/json: schema: $ref: '#/components/schemas/Batch' x-groq-metadata: returns: A created batch object. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/batches \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t", "endpoint": "/v1/chat/completions", "completion_window": "24h" }' py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) batch = client.batches.create( completion_window="24h", endpoint="/v1/chat/completions", input_file_id="file_01jh6x76wtemjr74t1fh0faj5t", ) print(batch.id) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const batch = await client.batches.create({ completion_window: "24h", endpoint: "/v1/chat/completions", input_file_id: "file_01jh6x76wtemjr74t1fh0faj5t", }); console.log(batch.id); } main(); response: | { "id": "batch_01jh6xa7reempvjyh6n3yst2zw", "object": "batch", "endpoint": "/v1/chat/completions", "errors": null, "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t", "completion_window": "24h", "status": "validating", "output_file_id": null, "error_file_id": null, "finalizing_at": null, "failed_at": null, "expired_at": null, "cancelled_at": null, "request_counts": { "total": 0, "completed": 0, "failed": 0 }, "metadata": null, "created_at": 1736472600, "expires_at": 1736559000, "cancelling_at": null, "completed_at": null, "in_progress_at": null } get: operationId: listBatches tags: - Batch summary: List your organization's batches. responses: '200': description: Batch listed successfully. content: application/json: schema: $ref: '#/components/schemas/ListBatchesResponse' x-groq-metadata: returns: A list of batches examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/batches \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) batch_list = client.batches.list() print(batch_list.data) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const batchList = await client.batches.list(); console.log(batchList.data); } main(); response: | { "object": "list", "data": [ { "id": "batch_01jh6xa7reempvjyh6n3yst2zw", "object": "batch", "endpoint": "/v1/chat/completions", "errors": null, "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t", "completion_window": "24h", "status": "validating", "output_file_id": null, "error_file_id": null, "finalizing_at": null, "failed_at": null, "expired_at": null, "cancelled_at": null, "request_counts": { "total": 0, "completed": 0, "failed": 0 }, "metadata": null, "created_at": 1736472600, "expires_at": 1736559000, "cancelling_at": null, "completed_at": null, "in_progress_at": null } ] } /openai/v1/batches/{batch_id}: get: operationId: retrieveBatch tags: - Batch summary: Retrieves a batch. parameters: - in: path name: batch_id required: true schema: type: string description: The ID of the batch to retrieve. responses: '200': description: Batch retrieved successfully. content: application/json: schema: $ref: '#/components/schemas/Batch' x-groq-metadata: returns: A batch object. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/batches/batch_01jh6xa7reempvjyh6n3yst2zw \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) batch = client.batches.retrieve( "batch_01jh6xa7reempvjyh6n3yst2zw", ) print(batch.id) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const batch = await client.batches.retrieve("batch_01jh6xa7reempvjyh6n3yst2zw"); console.log(batch.id); } main(); response: | { "id": "batch_01jh6xa7reempvjyh6n3yst2zw", "object": "batch", "endpoint": "/v1/chat/completions", "errors": null, "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t", "completion_window": "24h", "status": "validating", "output_file_id": null, "error_file_id": null, "finalizing_at": null, "failed_at": null, "expired_at": null, "cancelled_at": null, "request_counts": { "total": 0, "completed": 0, "failed": 0 }, "metadata": null, "created_at": 1736472600, "expires_at": 1736559000, "cancelling_at": null, "completed_at": null, "in_progress_at": null } /openai/v1/batches/{batch_id}/cancel: post: operationId: cancelBatch tags: - Batch summary: Cancels a batch. parameters: - in: path name: batch_id required: true schema: type: string description: The ID of the batch to cancel. responses: '200': description: Batch cancelled successfully. content: application/json: schema: $ref: '#/components/schemas/Batch' x-groq-metadata: returns: A batch object. examples: - title: Default request: curl: | curl -X POST https://api.groq.com/openai/v1/batches/batch_01jh6xa7reempvjyh6n3yst2zw/cancel \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) batch = client.batches.cancel( "batch_01jh6xa7reempvjyh6n3yst2zw", ) print(batch.id) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const batch = await client.batches.cancel("batch_01jh6xa7reempvjyh6n3yst2zw"); console.log(batch.id); } main(); response: | { "id": "batch_01jh6xa7reempvjyh6n3yst2zw", "object": "batch", "endpoint": "/v1/chat/completions", "errors": null, "input_file_id": "file_01jh6x76wtemjr74t1fh0faj5t", "completion_window": "24h", "status": "cancelling", "output_file_id": null, "error_file_id": null, "finalizing_at": null, "failed_at": null, "expired_at": null, "cancelled_at": null, "request_counts": { "total": 0, "completed": 0, "failed": 0 }, "metadata": null, "created_at": 1736472600, "expires_at": 1736559000, "cancelling_at": null, "completed_at": null, "in_progress_at": null } /openai/v1/chat/completions: post: operationId: createChatCompletion tags: - Chat summary: Creates a model response for the given chat conversation. requestBody: required: true description: The chat prompt and parameters content: application/json: schema: $ref: '#/components/schemas/CreateChatCompletionRequest' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/CreateChatCompletionResponse' x-groq-metadata: returns: >- Returns a [chat completion](/docs/api-reference#chat-create) object, or a streamed sequence of [chat completion chunk](/docs/api-reference#chat-create) objects if the request is streamed. examples: - title: Default request: py: | import os from groq import Groq client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) chat_completion = client.chat.completions.create( messages=[ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "Explain the importance of fast language models", } ], model="llama-3.3-70b-versatile", ) print(chat_completion.choices[0].message.content) js: | import Groq from "groq-sdk"; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); async function main() { const completion = await groq.chat.completions .create({ messages: [ { role: "user", content: "Explain the importance of fast language models", }, ], model: "llama-3.3-70b-versatile", }) console.log(completion.choices[0].message.content); } main(); curl: | curl https://api.groq.com/openai/v1/chat/completions -s \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GROQ_API_KEY" \ -d '{ "model": "llama-3.3-70b-versatile", "messages": [{ "role": "user", "content": "Explain the importance of fast language models" }] }' response: | { "id": "chatcmpl-f51b2cd2-bef7-417e-964e-a08f0b513c22", "object": "chat.completion", "created": 1730241104, "model": "openai/gpt-oss-20b", "choices": [ { "index": 0, "message": { "role": "assistant", "content": "Fast language models have gained significant attention in recent years due to their ability to process and generate human-like text quickly and efficiently. The importance of fast language models can be understood from their potential applications and benefits:\n\n1. **Real-time Chatbots and Conversational Interfaces**: Fast language models enable the development of chatbots and conversational interfaces that can respond promptly to user queries, making them more engaging and useful.\n2. **Sentiment Analysis and Opinion Mining**: Fast language models can quickly analyze text data to identify sentiments, opinions, and emotions, allowing for improved customer service, market research, and opinion mining.\n3. **Language Translation and Localization**: Fast language models can quickly translate text between languages, facilitating global communication and enabling businesses to reach a broader audience.\n4. **Text Summarization and Generation**: Fast language models can summarize long documents or even generate new text on a given topic, improving information retrieval and processing efficiency.\n5. **Named Entity Recognition and Information Extraction**: Fast language models can rapidly recognize and extract specific entities, such as names, locations, and organizations, from unstructured text data.\n6. **Recommendation Systems**: Fast language models can analyze large amounts of text data to personalize product recommendations, improve customer experience, and increase sales.\n7. **Content Generation for Social Media**: Fast language models can quickly generate engaging content for social media platforms, helping businesses maintain a consistent online presence and increasing their online visibility.\n8. **Sentiment Analysis for Stock Market Analysis**: Fast language models can quickly analyze social media posts, news articles, and other text data to identify sentiment trends, enabling financial analysts to make more informed investment decisions.\n9. **Language Learning and Education**: Fast language models can provide instant feedback and adaptive language learning, making language education more effective and engaging.\n10. **Domain-Specific Knowledge Extraction**: Fast language models can quickly extract relevant information from vast amounts of text data, enabling domain experts to focus on high-level decision-making rather than manual information gathering.\n\nThe benefits of fast language models include:\n\n* **Increased Efficiency**: Fast language models can process large amounts of text data quickly, reducing the time and effort required for tasks such as sentiment analysis, entity recognition, and text summarization.\n* **Improved Accuracy**: Fast language models can analyze and learn from large datasets, leading to more accurate results and more informed decision-making.\n* **Enhanced User Experience**: Fast language models can enable real-time interactions, personalized recommendations, and timely responses, improving the overall user experience.\n* **Cost Savings**: Fast language models can automate many tasks, reducing the need for manual labor and minimizing costs associated with data processing and analysis.\n\nIn summary, fast language models have the potential to transform various industries and applications by providing fast, accurate, and efficient language processing capabilities." }, "logprobs": null, "finish_reason": "stop" } ], "usage": { "queue_time": 0.037493756, "prompt_tokens": 18, "prompt_time": 0.000680594, "completion_tokens": 556, "completion_time": 0.463333333, "total_tokens": 574, "total_time": 0.464013927 }, "system_fingerprint": "fp_179b0f92c9", "x_groq": { "id": "req_01jbd6g2qdfw2adyrt2az8hz4w" } } /openai/v1/embeddings: post: operationId: createEmbedding tags: - Embeddings summary: Creates an embedding vector representing the input text. requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/CreateEmbeddingRequest' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/CreateEmbeddingResponse' /openai/v1/files: get: operationId: listFiles tags: - Files summary: Returns a list of files. responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/ListFilesResponse' x-groq-metadata: returns: A list of [File](/docs/api-reference#files-upload) objects. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/files \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) file_list = client.files.list() print(file_list.data) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const fileList = await client.files.list(); console.log(fileList.data); } main(); response: | { "object": "list", "data": [ { "id": "file_01jh6x76wtemjr74t1fh0faj5t", "object": "file", "bytes": 966, "created_at": 1736472501, "filename": "batch_file.jsonl", "purpose": "batch" } ] } post: operationId: uploadFile tags: - Files summary: > Upload a file that can be used across various endpoints. The Batch API only supports `.jsonl` files up to 100 MB in size. The input also has a specific required [format](/docs/batch). Please contact us if you need to increase these storage limits. requestBody: required: true content: multipart/form-data: schema: $ref: '#/components/schemas/CreateFileRequest' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/File' x-groq-metadata: returns: The uploaded File object. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/files \ -H "Authorization: Bearer $GROQ_API_KEY" \ -F purpose="batch" \ -F "file=@batch_file.jsonl" py: | import os import requests # pip install requests first! def upload_file_to_groq(api_key, file_path): url = "https://api.groq.com/openai/v1/files" headers = { "Authorization": f"Bearer {api_key}" } # Prepare the file and form data files = { "file": ("batch_file.jsonl", open(file_path, "rb")) } data = { "purpose": "batch" } # Make the POST request response = requests.post(url, headers=headers, files=files, data=data) return response.json() # Usage example api_key = os.environ.get("GROQ_API_KEY") file_path = "batch_file.jsonl" # Path to your JSONL file try: result = upload_file_to_groq(api_key, file_path) print(result) except Exception as e: print(f"Error: {e}") js: > import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); const fileContent = '{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "user", "content": "Explain the importance of fast language models"}]}}\n'; async function main() { const blob = new Blob([fileContent]); const file = new File([blob], 'batch.jsonl'); const createdFile = await client.files.create({ file: file, purpose: 'batch' }); console.log(createdFile.id); } main(); response: | { "id": "file_01jh6x76wtemjr74t1fh0faj5t", "object": "file", "bytes": 966, "created_at": 1736472501, "filename": "batch_file.jsonl", "purpose": "batch" } /openai/v1/files/{file_id}: delete: operationId: deleteFile tags: - Files summary: Delete a file. parameters: - in: path name: file_id required: true schema: type: string description: The ID of the file to use for this request. responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/DeleteFileResponse' x-groq-metadata: returns: A deleted file response object. examples: - title: Default request: curl: | curl -X DELETE https://api.groq.com/openai/v1/files/file_01jh6x76wtemjr74t1fh0faj5t \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) file_delete = client.files.delete( "file_01jh6x76wtemjr74t1fh0faj5t", ) print(file_delete) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const fileDelete = await client.files.delete("file_01jh6x76wtemjr74t1fh0faj5t"); console.log(fileDelete); } main(); response: | { "id": "file_01jh6x76wtemjr74t1fh0faj5t", "object": "file", "deleted": true } get: operationId: retrieveFile tags: - Files summary: Returns information about a file. parameters: - name: file_id in: path description: The file to retrieve required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/File' x-groq-metadata: returns: A file object. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/files/file_01jh6x76wtemjr74t1fh0faj5t \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) file = client.files.info( "file_01jh6x76wtemjr74t1fh0faj5t", ) print(file) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const file = await client.files.info('file_01jh6x76wtemjr74t1fh0faj5t'); console.log(file); } main(); response: | { "id": "file_01jh6x76wtemjr74t1fh0faj5t", "object": "file", "bytes": 966, "created_at": 1736472501, "filename": "batch_file.jsonl", "purpose": "batch" } /openai/v1/files/{file_id}/content: get: operationId: downloadFile tags: - Files summary: Returns the contents of the specified file. parameters: - in: path name: file_id required: true schema: type: string description: The ID of the file to use for this request. responses: '200': description: OK content: application/octet-stream: schema: type: string format: binary x-groq-metadata: returns: The file content examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/files/file_01jh6x76wtemjr74t1fh0faj5t/content \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" py: | import os from groq import Groq client = Groq( api_key=os.environ.get("GROQ_API_KEY"), # This is the default and can be omitted ) response = client.files.content( "file_01jh6x76wtemjr74t1fh0faj5t", ) print(response) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], // This is the default and can be omitted }); async function main() { const response = await client.files.content('file_01jh6x76wtemjr74t1fh0faj5t'); console.log(response); } main(); /v1/fine_tunings: get: operationId: listFineTunings summary: >- Lists all previously created fine tunings. This endpoint is in closed beta. [Contact us](https://groq.com/contact) for more information. tags: - Fine Tuning responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/ListFineTuningsResponse' x-groq-metadata: returns: The list of fine tunes examples: - title: Default request: curl: | curl https://api.groq.com/v1/fine_tunings -s \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GROQ_API_KEY" js: | import Groq from "groq-sdk"; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); async function main() { const fineTunings = await groq.fine_tunings.list(); console.log(fineTunings); } main(); py: | import os from groq import Groq client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) fine_tunings = client.fine_tunings.list() print(fine_tunings) response: | { "object": "list", "data": [ { "id": "string", "name": "string", "base_model": "string", "type": "string", "input_file_id": "string", "created_at": 0, "fine_tuned_model": "string" } ] } post: operationId: createFineTuning summary: >- Creates a new fine tuning for the already uploaded files This endpoint is in closed beta. [Contact us](https://groq.com/contact) for more information. tags: - Fine Tuning requestBody: content: application/json: schema: $ref: '#/components/schemas/CreateFineTuningRequest' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/ReadFineTuningResponse' x-groq-metadata: returns: The newly created fine tune examples: - title: Default request: curl: | curl https://api.groq.com/v1/fine_tunings -s \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GROQ_API_KEY" \ -d '{ "input_file_id": "", "name": "test-1", "type": "lora", "base_model": "llama-3.1-8b-instant" }' js: | import Groq from "groq-sdk"; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); async function main() { const fineTunings = await groq.fine_tunings.create({ input_file_id: "", name: "test-1", type: "lora", base_model: "llama-3.1-8b-instant" }); console.log(fineTunings); } main(); py: | import os from groq import Groq client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) fine_tunings = client.fine_tunings.create( input_file_id="", name="test-1", type="lora", base_model="llama-3.1-8b-instant" ) print(fine_tunings) response: | { "id": "string", "object": "object", "data": { "id": "string", "name": "string", "base_model": "string", "type": "string", "input_file_id": "string", "created_at": 0, "fine_tuned_model": "string" } } /v1/fine_tunings/{id}: delete: operationId: deleteFineTuning summary: >- Deletes an existing fine tuning by id This endpoint is in closed beta. [Contact us](https://groq.com/contact) for more information. tags: - Fine Tuning responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/DeleteFineTuningResponse' parameters: - name: id required: true in: path schema: type: string x-groq-metadata: returns: A confirmation of the deleted fine tune examples: - title: Default request: curl: | curl -X DELETE https://api.groq.com/v1/fine_tunings/:id -s \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GROQ_API_KEY" js: | import Groq from "groq-sdk"; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); async function main() { await groq.fine_tunings.delete({id: ""}); } main(); py: | import os from groq import Groq client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) client.fine_tunings.delete(id="") response: | { "id": "string", "object": "fine_tuning", "deleted": true } get: operationId: getFineTuning summary: >- Retrieves an existing fine tuning by id This endpoint is in closed beta. [Contact us](https://groq.com/contact) for more information. tags: - Fine Tuning responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/ReadFineTuningResponse' parameters: - name: id required: true in: path schema: type: string x-groq-metadata: returns: A fine tune metadata object examples: - title: Default request: curl: | curl https://api.groq.com/v1/fine_tunings/:id -s \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GROQ_API_KEY" js: | import Groq from "groq-sdk"; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); async function main() { const fineTuning = await groq.fine_tunings.get({id: ""}); console.log(fineTuning); } main(); py: | import os from groq import Groq client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) fine_tuning = client.fine_tunings.get(id="") print(fine_tuning) response: | { "id": "string", "object": "object", "data": { "id": "string", "name": "string", "base_model": "string", "type": "string", "input_file_id": "string", "created_at": 0, "fine_tuned_model": "string" } } /openai/v1/models: get: operationId: listModels tags: - Models summary: List all available [models](https://console.groq.com/docs/models). description: get all available models responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/ListModelsResponse' x-groq-metadata: returns: A list of model objects. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/models \ -H "Authorization: Bearer $GROQ_API_KEY" js: | import Groq from "groq-sdk"; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); async function main() { const models = await groq.models.list(); console.log(models); } main(); py: | import os from groq import Groq client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) models = client.models.list() print(models) response: | { "object": "list", "data": [ { "id": "gemma2-9b-it", "object": "model", "created": 1693721698, "owned_by": "Google", "active": true, "context_window": 8192, "public_apps": null }, { "id": "llama3-8b-8192", "object": "model", "created": 1693721698, "owned_by": "Meta", "active": true, "context_window": 8192, "public_apps": null }, { "id": "llama3-70b-8192", "object": "model", "created": 1693721698, "owned_by": "Meta", "active": true, "context_window": 8192, "public_apps": null }, { "id": "whisper-large-v3-turbo", "object": "model", "created": 1728413088, "owned_by": "OpenAI", "active": true, "context_window": 448, "public_apps": null }, { "id": "whisper-large-v3", "object": "model", "created": 1693721698, "owned_by": "OpenAI", "active": true, "context_window": 448, "public_apps": null }, { "id": "llama-guard-3-8b", "object": "model", "created": 1693721698, "owned_by": "Meta", "active": true, "context_window": 8192, "public_apps": null }, { "id": "distil-whisper-large-v3-en", "object": "model", "created": 1693721698, "owned_by": "Hugging Face", "active": true, "context_window": 448, "public_apps": null }, { "id": "llama-3.1-8b-instant", "object": "model", "created": 1693721698, "owned_by": "Meta", "active": true, "context_window": 131072, "public_apps": null } ] } /openai/v1/models/{model}: get: operationId: retrieveModel tags: - Models summary: Get detailed information about a [model](https://console.groq.com/docs/models). description: Get a specific model parameters: - name: model in: path description: The model to get required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Model' x-groq-metadata: returns: A model object. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/models/llama-3.3-70b-versatile \ -H "Authorization: Bearer $GROQ_API_KEY" js: | import Groq from "groq-sdk"; const groq = new Groq({ apiKey: process.env.GROQ_API_KEY }); async function main() { const model = await groq.models.retrieve("llama-3.3-70b-versatile"); console.log(model); } main(); py: | import os from groq import Groq client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) model = client.models.retrieve("llama-3.3-70b-versatile") print(model) response: | { "id": "llama3-8b-8192", "object": "model", "created": 1693721698, "owned_by": "Meta", "active": true, "context_window": 8192, "public_apps": null, "max_completion_tokens": 8192 } delete: operationId: deleteModel tags: - Models summary: Delete model description: Delete a model parameters: - in: path name: model description: The model to delete required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/DeleteModelResponse' /openai/v1/reranking: post: operationId: createReranking tags: - Reranking summary: Reranks documents based on their relevance to a query. description: | Given a query and a list of documents, returns the documents ranked by their relevance to the query. The documents are scored and sorted in descending order of relevance. requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/RerankingRequest' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/RerankingResponse' x-groq-metadata: returns: | A list of documents sorted by relevance score in descending order. Scores range from 0.0 to 1.0, where higher scores indicate greater relevance to the query. examples: - title: Basic Reranking request: curl: | curl https://api.groq.com/openai/v1/reranking \ -H "Authorization: Bearer $GROQ_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "model": "qwen3-reranker-4b", "query": "artificial intelligence", "docs": [ "Machine learning is a subset of AI", "The weather is nice today", "Deep learning uses neural networks" ] }' py: | import os from groq import Groq client = Groq(api_key=os.environ.get("GROQ_API_KEY")) reranking = client.reranking.create( model="qwen3-reranker-4b", query="artificial intelligence", docs=[ "Machine learning is a subset of AI", "The weather is nice today", "Deep learning uses neural networks" ] ) print(reranking.results) js: | import Groq from 'groq-sdk'; const client = new Groq({ apiKey: process.env['GROQ_API_KEY'], }); async function main() { const reranking = await client.reranking.create({ model: 'qwen3-reranker-4b', query: 'artificial intelligence', docs: [ 'Machine learning is a subset of AI', 'The weather is nice today', 'Deep learning uses neural networks' ] }); console.log(reranking.results); } main(); response: | { "results": [ { "doc": "Machine learning is a subset of AI", "score": 0.92 }, { "doc": "Deep learning uses neural networks", "score": 0.87 }, { "doc": "The weather is nice today", "score": 0.23 } ] } - title: Reranking with Custom Instruction request: json: | { "model": "qwen3-reranker-4b", "query": "climate change effects", "docs": [ "Global warming causes sea level rise", "Electric cars reduce emissions", "Renewable energy is growing fast" ], "instruction": "Find documents specifically about environmental impacts" } response: | { "results": [ { "doc": "Global warming causes sea level rise", "score": 0.95 }, { "doc": "Electric cars reduce emissions", "score": 0.78 }, { "doc": "Renewable energy is growing fast", "score": 0.65 } ] } /openai/v1/responses: post: operationId: createResponse tags: - Responses summary: Creates a model response for the given input. requestBody: required: true description: The input prompt and parameters content: application/json: schema: $ref: '#/components/schemas/CreateResponseRequest' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/CreateResponseResponse' x-groq-metadata: returns: >- Returns a [response](/docs/api-reference#responses-create) object, or a streamed sequence of [response events](/docs/api-reference#responses-streaming) if the request is streamed. examples: - title: Default request: curl: | curl https://api.groq.com/openai/v1/responses -s \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $GROQ_API_KEY" \ -d '{ "model": "openai/gpt-oss-120b", "input": "Tell me a three sentence bedtime story about a unicorn." }' response: | { "id": "resp_01k1x6w9ane6d8rfxm05cb45yk", "object": "response", "status": "completed", "created_at": 1754400695, "output": [ { "type": "message", "id": "msg_01k1x6w9ane6eb0650crhawwyy", "status": "completed", "role": "assistant", "content": [ { "type": "output_text", "text": "When the stars blinked awake, Luna the unicorn curled her mane and whispered wishes to the sleeping pine trees. She galloped through a field of moonlit daisies, gathering dew like tiny silver pearls. With a gentle sigh, she tucked her hooves beneath a silver cloud so the world slept softly, dreaming of her gentle hooves until the morning.", "annotations": [] } ] } ], "previous_response_id": null, "model": "llama-3.3-70b-versatile", "reasoning": { "effort": null, "summary": null }, "max_output_tokens": null, "instructions": null, "text": { "format": { "type": "text" } }, "tools": [], "tool_choice": "auto", "truncation": "disabled", "metadata": {}, "temperature": 1, "top_p": 1, "user": null, "service_tier": "default", "error": null, "incomplete_details": null, "usage": { "input_tokens": 82, "input_tokens_details": { "cached_tokens": 0 }, "output_tokens": 266, "output_tokens_details": { "reasoning_tokens": 0 }, "total_tokens": 348 }, "parallel_tool_calls": true, "store": false } components: securitySchemes: api_key: type: http scheme: bearer bearerFormat: apiKey schemas: Error: type: object properties: message: type: string nullable: false type: type: string nullable: false param: type: string nullable: true code: type: string nullable: true failed_generation: type: string nullable: true schema_path: type: string nullable: true schema_path_segments: type: array description: Segments of the schema path relevant to validation errors. items: type: string schema_kind: type: string nullable: true schema_code: type: string nullable: true debug: $ref: '#/components/schemas/DebugData' required: - type - message ErrorResponse: type: object properties: error: $ref: '#/components/schemas/Error' required: - error CreateSpeechRequest: type: object additionalProperties: false properties: model: description: | One of the [available TTS models](/docs/text-to-speech). anyOf: - type: string - type: string enum: - playai-tts - playai-tts-arabic example: playai-tts input: example: The quick brown fox jumped over the lazy dog type: string description: The text to generate audio for. voice: description: >- The voice to use when generating the audio. List of voices can be found [here](/docs/text-to-speech). type: string example: Fritz-PlayAI response_format: description: The format of the generated audio. Supported formats are `flac, mp3, mulaw, ogg, wav`. default: mp3 type: string enum: - flac - mp3 - mulaw - ogg - wav sample_rate: type: integer description: The sample rate for generated audio default: 48000 example: 48000 enum: - 8000 - 16000 - 22050 - 24000 - 32000 - 44100 - 48000 speed: description: The speed of the generated audio. example: 1 type: number default: 1 minimum: 0.5 maximum: 5 required: - model - input - voice CreateTranscriptionRequest: type: object additionalProperties: false properties: file: description: > The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. Either a file or a URL must be provided. Note that the file field is not supported in Batch API requests. type: string format: binary url: description: > The audio URL to translate/transcribe (supports Base64URL). Either a file or a URL must be provided. For Batch API requests, the URL field is required since the file field is not supported. type: string model: description: | ID of the model to use. `whisper-large-v3` and `whisper-large-v3-turbo` are currently available. example: whisper-large-v3-turbo anyOf: - type: string - type: string enum: - whisper-large-v3 - whisper-large-v3-turbo language: description: > The language of the input audio. Supplying the input language in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency. anyOf: - type: string - type: string enum: - en - zh - de - es - ru - ko - fr - ja - pt - tr - pl - ca - nl - ar - sv - it - id - hi - fi - vi - he - uk - el - ms - cs - ro - da - hu - ta - 'no' - th - ur - hr - bg - lt - la - mi - ml - cy - sk - te - fa - lv - bn - sr - az - sl - kn - et - mk - br - eu - is - hy - ne - mn - bs - kk - sq - sw - gl - mr - pa - si - km - sn - yo - so - af - oc - ka - be - tg - sd - gu - am - yi - lo - uz - fo - ht - ps - tk - nn - mt - sa - lb - my - bo - tl - mg - as - tt - haw - ln - ha - ba - jv - su - yue prompt: description: > An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/speech-text) should match the audio language. type: string response_format: description: | The format of the transcript output, in one of these options: `json`, `text`, or `verbose_json`. type: string enum: - json - text - verbose_json default: json temperature: description: > The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit. type: number default: 0 timestamp_granularities: description: > The timestamp granularities to populate for this transcription. `response_format` must be set `verbose_json` to use timestamp granularities. Either or both of these options are supported: `word`, or `segment`. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency. type: array items: type: string enum: - word - segment default: - segment oneOf: - required: - file - required: - url required: - model CreateTranscriptionResponseJson: type: object description: Represents a transcription response returned by model, based on the provided input. properties: text: type: string description: The transcribed text. required: - text TranscriptionSegment: type: object properties: id: type: integer description: Unique identifier of the segment. seek: type: integer description: Seek offset of the segment. start: type: number format: float description: Start time of the segment in seconds. end: type: number format: float description: End time of the segment in seconds. text: type: string description: Text content of the segment. tokens: type: array items: type: integer description: Array of token IDs for the text content. temperature: type: number format: float description: Temperature parameter used for generating the segment. avg_logprob: type: number format: float description: Average logprob of the segment. If the value is lower than -1, consider the logprobs failed. compression_ratio: type: number format: float description: >- Compression ratio of the segment. If the value is greater than 2.4, consider the compression failed. no_speech_prob: type: number format: float description: >- Probability of no speech in the segment. If the value is higher than 1.0 and the `avg_logprob` is below -1, consider this segment silent. audio_text: type: string description: Transcribed text for the entire chunk token_scores: type: array items: type: number format: float description: Token-level scores. chunk_start: type: integer description: Start chunk timestamp. chunk_end: type: integer description: End chunk timestamp. required: - id - seek - start - end - text - tokens TranscriptionWord: type: object properties: word: type: string description: The text content of the word. start: type: number format: float description: Start time of the word in seconds. end: type: number format: float description: End time of the word in seconds. required: - word - start - end CreateTranscriptionResponseVerboseJson: type: object description: Represents a verbose json transcription response returned by model, based on the provided input. properties: language: type: string description: The language of the input audio. duration: type: string description: The duration of the input audio. text: type: string description: The transcribed text. words: type: array description: Extracted words and their corresponding timestamps. items: $ref: '#/components/schemas/TranscriptionWord' segments: type: array description: Segments of the transcribed text and their corresponding details. items: $ref: '#/components/schemas/TranscriptionSegment' required: - language - duration - text CreateTranslationRequest: type: object additionalProperties: false properties: file: description: > The audio file object (not file name) translate, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. type: string format: binary url: description: | The audio URL to translate/transcribe (supports Base64URL). Either file or url must be provided. When using the Batch API only url is supported. type: string model: description: | ID of the model to use. `whisper-large-v3` and `whisper-large-v3-turbo` are currently available. example: whisper-large-v3-turbo anyOf: - type: string - type: string enum: - whisper-large-v3 - whisper-large-v3-turbo prompt: description: > An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/guides/speech-to-text/prompting) should be in English. type: string response_format: description: | The format of the transcript output, in one of these options: `json`, `text`, or `verbose_json`. type: string enum: - json - text - verbose_json default: json temperature: description: > The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit. type: number default: 0 oneOf: - required: - file - required: - url required: - model CreateTranslationResponseJson: type: object properties: text: type: string required: - text CreateTranslationResponseVerboseJson: type: object properties: language: type: string description: The language of the output translation (always `english`). duration: type: string description: The duration of the input audio. text: type: string description: The translated text. segments: type: array description: Segments of the translated text and their corresponding details. items: $ref: '#/components/schemas/TranscriptionSegment' required: - language - duration - text Batch: type: object properties: id: type: string object: type: string enum: - batch description: The object type, which is always `batch`. x-stainless-const: true endpoint: type: string description: The API endpoint used by the batch. errors: type: object properties: object: type: string description: The object type, which is always `list`. data: type: array items: type: object properties: code: type: string description: An error code identifying the error type. message: type: string description: A human-readable message providing more details about the error. param: type: string description: The name of the parameter that caused the error, if applicable. nullable: true line: type: integer description: The line number of the input file where the error occurred, if applicable. nullable: true input_file_id: type: string description: The ID of the input file for the batch. completion_window: type: string description: The time frame within which the batch should be processed. status: type: string description: The current status of the batch. enum: - validating - failed - in_progress - finalizing - completed - expired - cancelling - cancelled output_file_id: type: string description: The ID of the file containing the outputs of successfully executed requests. error_file_id: type: string description: The ID of the file containing the outputs of requests with errors. created_at: type: integer description: The Unix timestamp (in seconds) for when the batch was created. in_progress_at: type: integer description: The Unix timestamp (in seconds) for when the batch started processing. expires_at: type: integer description: The Unix timestamp (in seconds) for when the batch will expire. finalizing_at: type: integer description: The Unix timestamp (in seconds) for when the batch started finalizing. completed_at: type: integer description: The Unix timestamp (in seconds) for when the batch was completed. failed_at: type: integer description: The Unix timestamp (in seconds) for when the batch failed. expired_at: type: integer description: The Unix timestamp (in seconds) for when the batch expired. cancelling_at: type: integer description: The Unix timestamp (in seconds) for when the batch started cancelling. cancelled_at: type: integer description: The Unix timestamp (in seconds) for when the batch was cancelled. request_counts: type: object properties: total: type: integer description: Total number of requests in the batch. completed: type: integer description: Number of requests that have been completed successfully. failed: type: integer description: Number of requests that have failed. required: - total - completed - failed description: The request counts for different statuses within the batch. metadata: description: > Set of key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. type: object nullable: true required: - id - object - endpoint - input_file_id - completion_window - status - created_at BatchRequestInput: type: object description: The per-line object of the batch input file properties: custom_id: type: string description: >- A developer-provided per-request id that will be used to match outputs to inputs. Must be unique for each request in a batch. method: type: string enum: - POST description: The HTTP method to be used for the request. Currently only `POST` is supported. x-stainless-const: true url: type: string description: >- The OpenAI API relative URL to be used for the request. Currently `/v1/chat/completions` is supported. BatchRequestOutput: type: object description: The per-line object of the batch output and error files properties: id: type: string custom_id: type: string description: A developer-provided per-request id that will be used to match outputs to inputs. response: type: object nullable: true properties: status_code: type: integer description: The HTTP status code of the response request_id: type: string description: >- An unique identifier for the OpenAI API request. Please include this request ID when contacting support. body: type: object description: The JSON body of the response error: type: object nullable: true description: >- For requests that failed with a non-HTTP error, this will contain more information on the cause of the failure. properties: code: type: string description: A machine-readable error code. message: type: string description: A human-readable error message. ListBatchesResponse: type: object properties: data: type: array items: $ref: '#/components/schemas/Batch' object: type: string enum: - list x-stainless-const: true required: - object - data ChatCompletionRequestMessageContentPart: oneOf: - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartImage' - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartDocument' ChatCompletionRequestMessageContentPartImage: type: object title: Image content part properties: type: type: string enum: - image_url description: The type of the content part. image_url: type: object properties: url: type: string description: Either a URL of the image or the base64 encoded image data. format: uri detail: type: string description: Specifies the detail level of the image. enum: - auto - low - high default: auto required: - url required: - type - image_url ChatCompletionRequestMessageContentPartText: type: object title: Text content part properties: type: type: string enum: - text description: The type of the content part. text: type: string description: The text content. required: - type - text ChatCompletionRequestMessageContentPartDocument: type: object title: Document content part properties: type: type: string enum: - document description: The type of the content part. document: type: object properties: data: type: object description: The JSON document data. additionalProperties: true id: type: string nullable: true description: Optional unique identifier for the document. required: - data required: - type - document ChatCompletionDocument: type: object title: Document description: A document that can be referenced by the model while generating responses. additionalProperties: false properties: id: type: string nullable: true description: Optional unique identifier that can be used for citations in responses. source: $ref: '#/components/schemas/ChatCompletionDocumentSource' required: - source ChatCompletionDocumentSource: title: Document source description: The source of the document. Only text and JSON sources are currently supported. oneOf: - $ref: '#/components/schemas/ChatCompletionDocumentSourceText' - $ref: '#/components/schemas/ChatCompletionDocumentSourceJSON' discriminator: propertyName: type mapping: text: '#/components/schemas/ChatCompletionDocumentSourceText' json: '#/components/schemas/ChatCompletionDocumentSourceJSON' ChatCompletionDocumentSourceText: type: object title: Text document source description: A document whose contents are provided inline as text. additionalProperties: false properties: type: type: string enum: - text description: Identifies this document source as inline text. text: type: string description: The document contents. required: - type - text ChatCompletionDocumentSourceJSON: type: object title: JSON document source description: A document whose contents are provided inline as JSON data. additionalProperties: false properties: type: type: string enum: - json description: Identifies this document source as JSON data. data: type: object description: The JSON payload associated with the document. additionalProperties: true required: - type - data ChatCompletionRequestMessage: oneOf: - $ref: '#/components/schemas/ChatCompletionRequestSystemMessage' - $ref: '#/components/schemas/ChatCompletionRequestUserMessage' - $ref: '#/components/schemas/ChatCompletionRequestAssistantMessage' - $ref: '#/components/schemas/ChatCompletionRequestToolMessage' - $ref: '#/components/schemas/ChatCompletionRequestFunctionMessage' discriminator: propertyName: role mapping: system: '#/components/schemas/ChatCompletionRequestSystemMessage' developer: '#/components/schemas/ChatCompletionRequestSystemMessage' user: '#/components/schemas/ChatCompletionRequestUserMessage' assistant: '#/components/schemas/ChatCompletionRequestAssistantMessage' tool: '#/components/schemas/ChatCompletionRequestToolMessage' function: '#/components/schemas/ChatCompletionRequestFunctionMessage' ChatCompletionRequestSystemMessage: type: object title: System message additionalProperties: false properties: content: title: System message content description: The contents of the system message. oneOf: - type: string title: Text content description: The text contents of the message. - type: array title: Array of content parts description: An array of content parts with a defined type, only `text` is supported for this message type. items: $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' minItems: 1 role: type: string enum: - system - developer description: The role of the messages author, in this case `system`. name: type: string description: >- An optional name for the participant. Provides the model information to differentiate between participants of the same role. required: - content - role ChatCompletionRequestUserMessage: type: object title: User message additionalProperties: false properties: content: title: User message content description: | The contents of the user message. oneOf: - type: string description: The text contents of the message. title: Text content - type: array description: >- An array of content parts with a defined type, each can be of type `text` or `image_url` when passing in images. You can pass multiple images by adding multiple `image_url` content parts. Image input is only supported when using the `gpt-4-visual-preview` model. title: Array of content parts items: $ref: '#/components/schemas/ChatCompletionRequestMessageContentPart' minItems: 1 role: type: string enum: - user description: The role of the messages author, in this case `user`. name: type: string description: >- An optional name for the participant. Provides the model information to differentiate between participants of the same role. required: - content - role ChatCompletionRequestAssistantMessage: type: object title: Assistant message additionalProperties: false properties: content: title: Assistant message content nullable: true description: > The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified. oneOf: - type: string title: Text content description: The text contents of the message. - type: array description: An array of content parts with a defined type, only `text` is supported for this message type. title: Array of content parts items: $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' reasoning: description: > The reasoning output by the assistant if reasoning_format was set to 'parsed'. This field is supported on [models that support reasoning](https://console.groq.com/docs/reasoning). nullable: true type: string role: type: string enum: - assistant description: The role of the messages author, in this case `assistant`. name: type: string description: >- An optional name for the participant. Provides the model information to differentiate between participants of the same role. tool_calls: $ref: '#/components/schemas/ChatCompletionMessageToolCalls' function_call: type: object deprecated: true description: >- Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model. properties: arguments: type: string description: >- The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. name: type: string description: The name of the function to call. required: - role ChatCompletionRequestToolMessage: type: object title: Tool message additionalProperties: false properties: role: type: string enum: - tool description: The role of the messages author, in this case `tool`. content: description: The contents of the tool message. title: Tool message content oneOf: - type: string description: The text contents of the message. title: Text content - type: array description: >- An array of content parts with a defined type, each can be of type `text` or `image_url` when passing in images. You can pass multiple images by adding multiple `image_url` content parts. Image input is only supported when using the `gpt-4-visual-preview` model. title: Array of content parts items: $ref: '#/components/schemas/ChatCompletionRequestMessageContentPart' minItems: 1 tool_call_id: type: string description: Tool call that this message is responding to. required: - role - content - tool_call_id ChatCompletionRequestFunctionMessage: type: object title: Function message additionalProperties: false deprecated: true properties: role: type: string enum: - function description: The role of the messages author, in this case `function`. content: nullable: true title: Function message content type: string description: The contents of the function message. name: type: string description: The name of the function to call. required: - role - content - name FunctionParameters: type: object description: >- Function parameters defined as a JSON Schema object. Refer to https://json-schema.org/understanding-json-schema/ for schema documentation. additionalProperties: true ChatCompletionFunctions: type: object deprecated: true properties: description: type: string description: >- A description of what the function does, used by the model to choose when and how to call the function. name: type: string description: >- The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. parameters: $ref: '#/components/schemas/FunctionParameters' required: - name ChatCompletionFunctionCallOption: type: object description: | Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. properties: name: type: string description: The name of the function to call. required: - name ChatCompletionTool: type: object properties: type: anyOf: - type: string enum: - function - browser_search - code_interpreter description: The type of the tool. `function`, `browser_search`, and `code_interpreter` are supported. - type: string function: $ref: '#/components/schemas/FunctionObject' required: - type FunctionObject: type: object properties: description: type: string description: >- A description of what the function does, used by the model to choose when and how to call the function. name: type: string description: >- The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. parameters: $ref: '#/components/schemas/FunctionParameters' strict: type: boolean default: false description: > Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. required: - name ChatCompletionToolChoiceOption: nullable: true description: > Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `required` means the model must call one or more tools. Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool. `none` is the default when no tools are present. `auto` is the default if tools are present. oneOf: - type: string description: > `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. enum: - none - auto - required - $ref: '#/components/schemas/ChatCompletionNamedToolChoice' x-groq-meta: validator: ChatCompletionToolChoiceOption ChatCompletionNamedToolChoice: type: object description: Specifies a tool the model should use. Use to force the model to call a specific function. properties: type: type: string enum: - function description: The type of the tool. Currently, only `function` is supported. function: type: object properties: name: type: string description: The name of the function to call. required: - name required: - type - function ChatCompletionMessageToolCalls: type: array description: The tool calls generated by the model, such as function calls. items: $ref: '#/components/schemas/ChatCompletionMessageToolCall' ChatCompletionMessageToolCall: type: object properties: id: type: string description: The ID of the tool call. type: type: string enum: - function description: The type of the tool. Currently, only `function` is supported. function: type: object description: The function that the model called. properties: name: type: string description: The name of the function to call. arguments: type: string description: >- The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. required: - name - arguments required: - id - type - function ChatCompletionMessageToolCallChunk: type: object properties: index: type: integer id: type: string description: The ID of the tool call. type: type: string enum: - function description: The type of the tool. Currently, only `function` is supported. function: type: object properties: name: type: string description: The name of the function to call. arguments: type: string description: >- The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. required: - index ChatCompletionRole: type: string description: The role of the author of a message enum: - system - user - assistant - tool - function ChatCompletionStreamOptions: description: | Options for streaming response. Only set this when you set `stream: true`. type: object nullable: true default: null properties: include_usage: type: boolean nullable: true description: > If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. ChatCompletionResponseMessage: type: object description: A chat completion message generated by the model. properties: content: type: string description: The contents of the message. nullable: true reasoning: type: string description: >- The model's reasoning for a response. Only available for [models that support reasoning](https://console.groq.com/docs/reasoning) when request parameter reasoning_format has value `parsed`. nullable: true tool_calls: $ref: '#/components/schemas/ChatCompletionMessageToolCalls' executed_tools: $ref: '#/components/schemas/ChatCompletionMessageExecutedTools' role: type: string enum: - assistant description: The role of the author of this message. function_call: type: object deprecated: true description: >- Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model. properties: arguments: type: string description: >- The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. name: type: string description: The name of the function to call. required: - name - arguments annotations: type: array description: A list of annotations providing citations and references for the content in the message. items: $ref: '#/components/schemas/Annotation' required: - role - content ChatCompletionMessageExecutedTools: type: array description: A list of tools that were executed during the chat completion for compound AI systems. items: type: object properties: index: type: integer description: The index of the executed tool. type: type: string description: The type of tool that was executed. arguments: type: string description: The arguments passed to the tool in JSON format. output: type: string nullable: true description: The output returned by the tool. search_results: type: object nullable: true description: The search results returned by the tool, if applicable. properties: results: type: array description: List of search results items: type: object properties: title: type: string description: The title of the search result url: type: string description: The URL of the search result content: type: string description: The content of the search result score: type: number format: float description: The relevance score of the search result images: type: array description: List of image URLs returned by the search items: type: string code_results: type: array items: $ref: '#/components/schemas/CodeExecutionResult' description: Array of code execution results browser_results: type: array items: $ref: '#/components/schemas/BrowserResult' description: Array of browser results required: - index - type - arguments ChatCompletionStreamResponseDelta: type: object description: A chat completion delta generated by streamed model responses. properties: content: type: string description: The contents of the chunk message. nullable: true reasoning: type: string description: >- The model's reasoning for a response. Only available for [models that support reasoning](https://console.groq.com/docs/reasoning) when request parameter reasoning_format has value `parsed`. nullable: true function_call: deprecated: true type: object description: >- Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model. properties: arguments: type: string description: >- The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. name: type: string description: The name of the function to call. tool_calls: type: array items: $ref: '#/components/schemas/ChatCompletionMessageToolCallChunk' executed_tools: $ref: '#/components/schemas/ChatCompletionMessageExecutedTools' role: type: string enum: - system - user - assistant - tool description: The role of the author of this message. annotations: type: array description: A list of annotations providing citations and references for the content in the message. items: $ref: '#/components/schemas/Annotation' CreateChatCompletionRequest: type: object additionalProperties: false properties: messages: description: A list of messages comprising the conversation so far. type: array minItems: 1 items: $ref: '#/components/schemas/ChatCompletionRequestMessage' model: description: >- ID of the model to use. For details on which models are compatible with the Chat API, see available [models](https://console.groq.com/docs/models) example: meta-llama/llama-4-scout-17b-16e-instruct anyOf: - type: string - type: string enum: - compound-beta - compound-beta-mini - gemma2-9b-it - llama-3.1-8b-instant - llama-3.3-70b-versatile - meta-llama/llama-4-maverick-17b-128e-instruct - meta-llama/llama-4-scout-17b-16e-instruct - meta-llama/llama-guard-4-12b - moonshotai/kimi-k2-instruct - openai/gpt-oss-120b - openai/gpt-oss-20b - qwen/qwen3-32b disable_tool_validation: type: boolean default: false description: > If set to true, groq will return called tools without validating that the tool is present in request.tools. tool_choice=required/none will still be enforced, but the request cannot require a specific tool be used. frequency_penalty: type: number default: 0 minimum: -2 maximum: 2 nullable: true description: >- This is not yet supported by any of our models. Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. include_reasoning: type: boolean nullable: true description: > Whether to include reasoning in the response. If true, the response will include a `reasoning` field. If false, the model's reasoning will not be included in the response. This field is mutually exclusive with `reasoning_format`. logit_bias: type: object default: null nullable: true additionalProperties: type: integer description: | This is not yet supported by any of our models. Modify the likelihood of specified tokens appearing in the completion. logprobs: description: > This is not yet supported by any of our models. Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the `content` of `message`. type: boolean default: false nullable: true top_logprobs: description: > This is not yet supported by any of our models. An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used. type: integer minimum: 0 maximum: 20 nullable: true max_tokens: description: > Deprecated in favor of `max_completion_tokens`. The maximum number of tokens that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. type: integer nullable: true deprecated: true max_completion_tokens: description: >- The maximum number of tokens that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. type: integer nullable: true 'n': type: integer minimum: 1 maximum: 1 default: 1 example: 1 nullable: true description: >- How many chat completion choices to generate for each input message. Note that the current moment, only n=1 is supported. Other values will result in a 400 response. presence_penalty: type: number default: 0 minimum: -2 maximum: 2 nullable: true description: >- This is not yet supported by any of our models. Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. response_format: nullable: true description: > An object specifying the format that the model must output. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. `json_schema` response format is only available on [supported models](https://console.groq.com/docs/structured-outputs#supported-models). Setting to `{ "type": "json_object" }` enables the older JSON mode, which ensures the message the model generates is valid JSON. Using `json_schema` is preferred for models that support it. oneOf: - $ref: '#/components/schemas/ResponseFormatText' - $ref: '#/components/schemas/ResponseFormatJsonSchema' - $ref: '#/components/schemas/ResponseFormatJsonObject' seed: type: integer nullable: true description: > If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. service_tier: type: string nullable: true description: > The service tier to use for the request. Defaults to `on_demand`. - `auto` will automatically select the highest tier available within the rate limits of your organization. - `flex` uses the flex tier, which will succeed or fail quickly. enum: - auto - on_demand - flex - performance - null stop: description: > Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. default: null nullable: true oneOf: - type: string example: |+ nullable: true - type: array minItems: 0 maxItems: 4 items: type: string example: '["\n"]' reasoning_effort: description: | qwen3 models support the following values Set to 'none' to disable reasoning. Set to 'default' or null to let Qwen reason. openai/gpt-oss-20b and openai/gpt-oss-120b support 'low', 'medium', or 'high'. 'medium' is the default value. nullable: true default: null type: string enum: - none - default - low - medium - high reasoning_format: description: | Specifies how to output reasoning tokens This field is mutually exclusive with `include_reasoning`. default: null nullable: true type: string enum: - hidden - raw - parsed stream: description: > If set, partial message deltas will be sent. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example code](/docs/text-chat#streaming-a-chat-completion). type: boolean nullable: true default: false temperature: type: number minimum: 0 maximum: 2 default: 1 example: 1 nullable: true description: >- What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both. top_p: type: number minimum: 0 maximum: 1 default: 1 example: 1 nullable: true description: >- An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both. tools: type: array nullable: true maxItems: 128 description: > A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported. items: $ref: '#/components/schemas/ChatCompletionTool' tool_choice: $ref: '#/components/schemas/ChatCompletionToolChoiceOption' parallel_tool_calls: description: | Whether to enable parallel function calling during tool use. type: boolean nullable: true default: true user: type: string description: A unique identifier representing your end-user, which can help us monitor and detect abuse. nullable: true function_call: deprecated: true nullable: true description: > Deprecated in favor of `tool_choice`. Controls which (if any) function is called by the model. `none` means the model will not call a function and instead generates a message. `auto` means the model can pick between generating a message or calling a function. Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. `none` is the default when no functions are present. `auto` is the default if functions are present. oneOf: - type: string description: > `none` means the model will not call a function and instead generates a message. `auto` means the model can pick between generating a message or calling a function. enum: - none - auto - required - $ref: '#/components/schemas/ChatCompletionFunctionCallOption' functions: deprecated: true nullable: true description: | Deprecated in favor of `tools`. A list of functions the model may generate JSON inputs for. type: array minItems: 0 maxItems: 128 items: $ref: '#/components/schemas/ChatCompletionFunctions' metadata: type: object nullable: true additionalProperties: type: string description: | This parameter is not currently supported. store: type: boolean nullable: true description: | This parameter is not currently supported. include_domains: type: array nullable: true deprecated: true description: | Deprecated: Use search_settings.include_domains instead. A list of domains to include in the search results when the model uses a web search tool. items: type: string exclude_domains: type: array nullable: true deprecated: true description: | Deprecated: Use search_settings.exclude_domains instead. A list of domains to exclude from the search results when the model uses a web search tool. items: type: string search_settings: type: object nullable: true description: | Settings for web search functionality when the model uses a web search tool. properties: include_domains: type: array nullable: true description: A list of domains to include in the search results. items: type: string exclude_domains: type: array nullable: true description: A list of domains to exclude from the search results. items: type: string include_images: type: boolean nullable: true description: Whether to include images in the search results. country: type: string nullable: true description: >- Name of country to prioritize search results from (e.g., "united states", "germany", "france"). compound_custom: type: object nullable: true description: Custom configuration of models and tools for Compound. properties: models: type: object nullable: true properties: reasoning_model: type: string nullable: true description: Custom model to use for reasoning. answering_model: type: string nullable: true description: Custom model to use for answering. tools: type: object nullable: true description: Configuration options for tools available to Compound. properties: enabled_tools: type: array nullable: true description: A list of tool names that are enabled for the request. items: type: string wolfram_settings: type: object nullable: true description: Configuration for the Wolfram tool integration. properties: authorization: type: string nullable: true description: API key used to authorize requests to Wolfram services. documents: type: array nullable: true description: >- A list of documents to provide context for the conversation. Each document contains text that can be referenced by the model. items: $ref: '#/components/schemas/ChatCompletionDocument' citation_options: type: string enum: - enabled - disabled nullable: true default: enabled description: >- Whether to enable citations in the response. When enabled, the model will include citations for information retrieved from provided documents or web searches. required: - model - messages ResponseFormatJsonObject: type: object title: JSON object description: > JSON object response format. An older method of generating JSON responses. Using `json_schema` is recommended for models that support it. Note that the model will not generate JSON without a system or user message instructing it to do so. properties: type: type: string description: The type of response format being defined. Always `json_object`. enum: - json_object x-stainless-const: true required: - type ResponseFormatJsonSchema: type: object title: JSON schema description: | JSON Schema response format. Used to generate structured JSON responses. properties: type: type: string description: The type of response format being defined. Always `json_schema`. enum: - json_schema x-stainless-const: true json_schema: type: object title: JSON schema description: | Structured Outputs configuration options, including a JSON Schema. properties: description: type: string description: > A description of what the response format is for, used by the model to determine how to respond in the format. name: type: string description: > The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. schema: $ref: '#/components/schemas/ResponseFormatJsonSchemaSchema' strict: type: boolean nullable: true default: false description: > Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. required: - name required: - type - json_schema ResponseFormatJsonSchemaSchema: type: object title: JSON schema description: | The schema for the response format, described as a JSON Schema object. Learn how to build JSON schemas [here](https://json-schema.org/). additionalProperties: true ResponseFormatText: type: object title: Text description: | Default response format. Used to generate text responses. properties: type: type: string description: The type of response format being defined. Always `text`. enum: - text x-stainless-const: true required: - type CreateChatCompletionResponse: type: object description: Represents a chat completion response returned by model, based on the provided input. properties: id: type: string description: A unique identifier for the chat completion. choices: type: array description: A list of chat completion choices. Can be more than one if `n` is greater than 1. items: type: object required: - finish_reason - index - message - logprobs properties: finish_reason: type: string description: > The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence, `length` if the maximum number of tokens specified in the request was reached, `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function. enum: - stop - length - tool_calls - function_call index: type: integer description: The index of the choice in the list of choices. message: $ref: '#/components/schemas/ChatCompletionResponseMessage' logprobs: &ref_3 description: Log probability information for the choice. type: object nullable: true properties: content: description: A list of message content tokens with log probability information. type: array items: $ref: '#/components/schemas/ChatCompletionTokenLogprob' nullable: true required: - content created: type: integer description: The Unix timestamp (in seconds) of when the chat completion was created. model: type: string description: The model used for the chat completion. system_fingerprint: type: string description: > This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. object: type: string description: The object type, which is always `chat.completion`. enum: - chat.completion usage: $ref: '#/components/schemas/CompletionUsage' usage_breakdown: $ref: '#/components/schemas/ChatCompletionUsageBreakdown' description: >- Detailed usage breakdown by model when multiple models are used in the request for compound AI systems. service_tier: type: string nullable: true description: The service tier used for the request. enum: - auto - on_demand - flex - performance - null mcp_list_tools: type: array nullable: true description: List of discovered MCP tools from connected servers. items: type: object properties: id: type: string description: Unique identifier for this tool list response. type: type: string description: The type identifier. server_label: type: string description: Human-readable label for the MCP server. tools: type: array description: Array of discovered tools from the server. items: type: object properties: annotations: description: Additional metadata for the tool. description: type: string description: Description of what the tool does. input_schema: type: object additionalProperties: true description: JSON Schema describing the tool's input parameters. name: type: string description: The name of the tool. x_groq: $ref: '#/components/schemas/XGroqNonStreaming' required: - choices - created - id - model - object ChatCompletionTokenLogprob: type: object properties: token: &ref_0 description: The token. type: string logprob: &ref_1 description: >- The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. type: number bytes: &ref_2 description: >- A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token. type: array items: type: integer nullable: true top_logprobs: description: >- List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned. type: array items: type: object properties: token: *ref_0 logprob: *ref_1 bytes: *ref_2 required: - token - logprob - bytes required: - token - logprob - bytes - top_logprobs DebugData: type: object description: >- Debug information including input and output token IDs and strings. Only present when debug=true in the request. properties: input_token_ids: type: array items: type: integer description: Token IDs for the input. input_tokens: type: array items: type: string description: Token strings for the input. output_token_ids: type: array items: type: integer description: Token IDs for the output. output_tokens: type: array items: type: string description: Token strings for the output. XGroqNonStreaming: type: object description: Groq-specific metadata for non-streaming chat completion responses. properties: id: type: string description: A groq request ID which can be used to refer to a specific request to groq support. seed: type: integer nullable: true description: >- The seed used for the request. See the seed property on CreateChatCompletionRequest for more details. usage: type: object nullable: true description: Additional Groq-specific usage metrics (hardware cache statistics). properties: sram_cached_tokens: type: integer description: Number of tokens served from SRAM cache. dram_cached_tokens: type: integer description: Number of tokens served from DRAM cache. debug: allOf: - $ref: '#/components/schemas/DebugData' nullable: true required: - id XGroq: type: object description: Groq-specific metadata for streaming responses. Different fields appear in different chunks. properties: id: type: string nullable: true description: | A groq request ID which can be used to refer to a specific request to groq support. Sent only in the first and final chunk. seed: type: integer nullable: true description: The seed used for the request. Sent in the final chunk. usage: allOf: - $ref: '#/components/schemas/CompletionUsage' nullable: true description: Usage information for the stream. Only sent in the final chunk. usage_breakdown: allOf: - $ref: '#/components/schemas/ChatCompletionUsageBreakdown' nullable: true description: >- Detailed usage breakdown by model when multiple models are used in the request for compound AI systems. Only sent in the final chunk. error: type: string nullable: true description: An error string indicating why a stream was stopped early. debug: allOf: - $ref: '#/components/schemas/DebugData' nullable: true CreateChatCompletionStreamResponse: type: object description: >- Represents a streamed chunk of a chat completion response returned by model, based on the provided input. properties: id: type: string description: A unique identifier for the chat completion. Each chunk has the same ID. choices: type: array description: | A list of chat completion choices. Can contain more than one elements if `n` is greater than 1. items: type: object required: - delta - finish_reason - index properties: delta: $ref: '#/components/schemas/ChatCompletionStreamResponseDelta' logprobs: *ref_3 finish_reason: type: string description: > The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence, `length` if the maximum number of tokens specified in the request was reached, `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function. enum: - stop - length - tool_calls - function_call nullable: true index: type: integer description: The index of the choice in the list of choices. created: type: integer description: >- The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp. model: type: string description: The model to generate the completion. system_fingerprint: type: string description: > This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. object: type: string description: The object type, which is always `chat.completion.chunk`. enum: - chat.completion.chunk x_groq: $ref: '#/components/schemas/XGroq' required: - choices - created - id - model - object CompletionUsage: type: object description: Usage statistics for the completion request. properties: queue_time: type: number description: Time the requests was spent queued completion_time: type: number description: Time spent generating tokens completion_tokens: type: integer description: Number of tokens in the generated completion. prompt_time: type: number description: Time spent processing input tokens prompt_tokens: type: integer description: Number of tokens in the prompt. total_time: type: number description: completion time and prompt time combined total_tokens: type: integer description: Total number of tokens used in the request (prompt + completion). prompt_tokens_details: type: object nullable: true description: Breakdown of tokens in the prompt. properties: cached_tokens: type: integer description: Number of tokens that were cached and reused. required: - cached_tokens completion_tokens_details: type: object nullable: true description: Breakdown of tokens in the completion. properties: reasoning_tokens: type: integer description: Number of tokens used for reasoning (for reasoning models). required: - reasoning_tokens required: - prompt_tokens - completion_tokens - total_tokens ChatCompletionUsageBreakdown: type: object description: Usage statistics for compound AI completion requests. properties: models: type: array description: List of models used in the request and their individual usage statistics items: type: object properties: model: type: string description: The name/identifier of the model used usage: $ref: '#/components/schemas/CompletionUsage' required: - model - usage required: - models Chart: type: object properties: type: type: string description: The type of chart enum: - bar - box_and_whisker - line - pie - scatter - superchart - unknown title: type: string description: The title of the chart x_label: type: string description: The label for the x-axis y_label: type: string description: The label for the y-axis x_unit: type: string description: The unit for the x-axis y_unit: type: string description: The unit for the y-axis x_ticks: type: array items: type: number description: The tick values for the x-axis y_ticks: type: array items: type: number description: The tick values for the y-axis x_tick_labels: type: array items: type: string description: The labels for the x-axis ticks y_tick_labels: type: array items: type: string description: The labels for the y-axis ticks x_scale: type: string description: The scale type for the x-axis y_scale: type: string description: The scale type for the y-axis elements: type: array items: $ref: '#/components/schemas/ChartElement' description: The chart elements (data series, points, etc.) required: - type - elements ChartElement: type: object properties: label: type: string description: The label for this chart element group: type: string description: The group this element belongs to value: type: number description: The value for this element points: type: array items: type: array items: type: number description: The points for this element angle: type: number description: The angle for this element radius: type: number description: The radius for this element min: type: number description: The minimum value for this element first_quartile: type: number description: The first quartile value for this element median: type: number description: The median value for this element third_quartile: type: number description: The third quartile value for this element max: type: number outliers: type: array items: type: number description: The outliers for this element required: - label BrowserResult: type: object properties: url: type: string description: The URL of the browser window title: type: string description: The title of the browser window live_view_url: type: string description: The live view URL for the browser window content: type: string description: The content of the browser result required: - url - title additionalProperties: false CodeExecutionResult: type: object properties: text: type: string description: The text version of the code execution result png: type: string description: Base64 encoded PNG image output from code execution chart: $ref: '#/components/schemas/Chart' charts: type: array items: $ref: '#/components/schemas/Chart' description: Array of charts from a superchart additionalProperties: false Annotation: type: object description: An annotation that provides citations or references for content in a message. properties: type: type: string description: The type of annotation. enum: - document_citation - function_citation document_citation: $ref: '#/components/schemas/DocumentCitation' function_citation: $ref: '#/components/schemas/FunctionCitation' required: - type DocumentCitation: type: object description: A citation referencing a specific document that was provided in the request. properties: start_index: type: integer description: The character index in the message content where this citation begins. end_index: type: integer description: The character index in the message content where this citation ends. document_id: type: string description: The ID of the document being cited, corresponding to a document provided in the request. required: - start_index - end_index - document_id additionalProperties: false FunctionCitation: type: object description: A citation referencing the result of a function or tool call. properties: start_index: type: integer description: The character index in the message content where this citation begins. end_index: type: integer description: The character index in the message content where this citation ends. tool_call_id: type: string description: The ID of the tool call being cited, corresponding to a tool call made during the conversation. required: - start_index - end_index - tool_call_id additionalProperties: false Embedding: type: object description: | Represents an embedding vector returned by embedding endpoint. properties: index: type: integer description: The index of the embedding in the list of embeddings. embedding: oneOf: - type: array description: > The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings). items: type: number - type: string description: > The embedding vector, which is a base64 encoded string. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings). object: type: string description: The object type, which is always "embedding". enum: - embedding required: - index - object - embedding CreateEmbeddingRequest: type: object additionalProperties: false properties: input: description: > Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model, cannot be an empty string, and any array must be 2048 dimensions or less. example: The quick brown fox jumped over the lazy dog oneOf: - type: string title: string description: The string that will be turned into an embedding. default: '' example: This is a test. - type: array title: array description: The array of strings that will be turned into an embeddings. minItems: 1 maxItems: 2048 items: type: string default: '' example: "['This is a test.']" x-groq-meta: validator: EmbeddingInput model: description: | ID of the model to use. example: nomic-embed-text-v1_5 anyOf: - type: string - type: string enum: - nomic-embed-text-v1_5 encoding_format: description: The format to return the embeddings in. Can only be `float` or `base64`. example: float default: float type: string enum: - float - base64 user: type: string description: A unique identifier representing your end-user, which can help us monitor and detect abuse. nullable: true required: - model - input CreateEmbeddingResponse: type: object properties: data: type: array description: The list of embeddings generated by the model. items: $ref: '#/components/schemas/Embedding' model: type: string description: The name of the model used to generate the embedding. object: type: string description: The object type, which is always "list". enum: - list usage: type: object description: The usage information for the request. properties: prompt_tokens: type: integer description: The number of tokens used by the prompt. total_tokens: type: integer description: The total number of tokens used by the request. required: - prompt_tokens - total_tokens required: - object - model - data - usage File: title: File description: The `File` object represents a document that has been uploaded. properties: id: type: string description: The file identifier, which can be referenced in the API endpoints. bytes: type: integer description: The size of the file, in bytes. created_at: type: integer description: The Unix timestamp (in seconds) for when the file was created. filename: type: string description: The name of the file. object: type: string description: The object type, which is always `file`. enum: - file x-stainless-const: true purpose: type: string description: The intended purpose of the file. Supported values are `batch`, and `batch_output`. enum: - batch - batch_output CreateFileRequest: type: object additionalProperties: false properties: file: description: | The File object (not file name) to be uploaded. type: string format: binary purpose: description: | The intended purpose of the uploaded file. Use "batch" for [Batch API](/docs/api-reference#batches). type: string enum: - batch required: - file - purpose DeleteFileResponse: type: object properties: id: type: string object: type: string enum: - file x-stainless-const: true deleted: type: boolean required: - id - object - deleted ListFilesResponse: type: object properties: object: type: string enum: - list data: type: array items: $ref: '#/components/schemas/File' required: - object - data CreateFineTuningRequest: type: object properties: base_model: type: string description: | BaseModel is the model that the fine tune was originally trained on. input_file_id: type: string description: | InputFileID is the id of the file that was uploaded via the /files api. name: type: string description: | Name is the given name to a fine tuned model. type: type: string description: | Type is the type of fine tuning format such as "lora". DeleteFineTuningResponse: type: object properties: deleted: type: boolean id: type: string object: type: string ListFineTuningsResponse: type: object properties: data: type: array items: type: object properties: base_model: type: string description: | BaseModel is the model that the fine tune was originally trained on. created_at: type: number description: | CreatedAt is the timestamp of when the fine tuned model was created. fine_tuned_model: type: string description: | FineTunedModel is the final name of the fine tuned model. id: type: string description: | ID is the unique identifier of a fine tune. input_file_id: type: string description: | InputFileID is the id of the file that was uploaded via the /files api. name: type: string description: | Name is the given name to a fine tuned model. type: type: string description: | Type is the type of fine tuning format such as "lora". object: type: string ReadFineTuningResponse: type: object properties: data: type: object properties: base_model: type: string description: | BaseModel is the model that the fine tune was originally trained on. created_at: type: number description: | CreatedAt is the timestamp of when the fine tuned model was created. fine_tuned_model: type: string description: | FineTunedModel is the final name of the fine tuned model. id: type: string description: | ID is the unique identifier of a fine tune. input_file_id: type: string description: | InputFileID is the id of the file that was uploaded via the /files api. name: type: string description: | Name is the given name to a fine tuned model. type: type: string description: | Type is the type of fine tuning format such as "lora". id: type: string object: type: string Model: title: Model description: Describes an OpenAI model offering that can be used with the API. properties: id: type: string description: The model identifier, which can be referenced in the API endpoints. created: type: integer description: The Unix timestamp (in seconds) when the model was created. object: type: string description: The object type, which is always "model". enum: - model owned_by: type: string description: The organization that owns the model. required: - id - object - created - owned_by ListModelsResponse: type: object properties: object: type: string enum: - list data: type: array items: $ref: '#/components/schemas/Model' required: - object - data DeleteModelResponse: type: object properties: id: type: string deleted: type: boolean object: type: string required: - id - object - deleted RerankingRequest: type: object additionalProperties: false properties: model: description: | ID of the reranking model to use. example: qwen3-reranker-4b type: string query: description: | The search query to rank documents against. example: artificial intelligence research type: string docs: description: | An array of documents to rank. Each document is a string containing the text content. Maximum of 100 documents per request. type: array minItems: 1 maxItems: 100 items: type: string minLength: 1 example: - Machine learning is a subset of artificial intelligence - The weather forecast predicts rain tomorrow - Deep learning uses neural networks with multiple layers instruction: description: | Optional instruction to guide the reranking process. If not provided, a default instruction will be used. example: Find the most relevant document about AI research type: string nullable: true required: - model - query - docs RerankingResponse: type: object properties: results: type: array description: | List of documents sorted by relevance score in descending order. Each result contains the original document text and its relevance score. items: $ref: '#/components/schemas/RerankingResult' required: - results RerankingResult: type: object properties: doc: type: string description: The original document text. example: Machine learning is a subset of artificial intelligence score: type: number format: float minimum: 0 maximum: 1 description: | Relevance score between 0.0 and 1.0, where higher scores indicate greater relevance to the query. example: 0.92 required: - doc - score CreateResponseRequest: type: object additionalProperties: false properties: model: description: >- ID of the model to use. For details on which models are compatible with the Responses API, see available [models](https://console.groq.com/docs/models) example: llama-3.3-70b-versatile anyOf: - type: string - type: string enum: - gemma2-9b-it - llama-3.3-70b-versatile - llama-3.1-8b-instant - llama-guard-3-8b - llama3-70b-8192 - llama3-8b-8192 input: description: | Text input to the model, used to generate a response. oneOf: - type: string title: Text input description: A text input to the model, equivalent to a text input with the `user` role. - type: array title: Input item list description: A list of one or many input items to the model, containing different content types. items: $ref: '#/components/schemas/ResponseInputItem' instructions: type: string description: | Inserts a system (or developer) message as the first item in the model's context. nullable: true max_output_tokens: description: > An upper bound for the number of tokens that can be generated for a response, including visible output tokens and reasoning tokens. type: integer nullable: true temperature: type: number minimum: 0 maximum: 2 default: 1 example: 1 nullable: true description: > Controls randomness in the response generation. Range: 0 to 2. Lower values produce more deterministic outputs, higher values increase variety and creativity. top_p: type: number minimum: 0 maximum: 1 default: 1 example: 1 nullable: true description: > Nucleus sampling parameter that controls the cumulative probability cutoff. Range: 0 to 1. A value of 0.1 restricts sampling to tokens within the top 10% probability mass. tools: type: array nullable: true maxItems: 128 description: > List of tools available to the model. Currently supports function definitions only. Maximum of 128 functions. items: $ref: '#/components/schemas/ResponseTool' tool_choice: $ref: '#/components/schemas/ResponseToolChoiceOption' text: type: object description: | Response format configuration. Supports plain text or structured JSON output. properties: format: $ref: '#/components/schemas/ResponseFormatConfiguration' reasoning: type: object nullable: true description: > Configuration for reasoning capabilities when using [models that support reasoning](https://console.groq.com/docs/reasoning). properties: effort: type: string enum: - low - medium - high default: medium nullable: true description: > Level of reasoning effort. Supported values: `low`, `medium`, `high`. Lower values provide faster responses with less reasoning depth. metadata: type: object nullable: true additionalProperties: type: string description: | Custom key-value pairs for storing additional information. Maximum of 16 pairs. parallel_tool_calls: type: boolean description: | Enable parallel execution of multiple tool calls. default: true nullable: true store: type: boolean description: | Response storage flag. Note: Currently only supports false or null values. default: false nullable: true stream: description: | Enable streaming mode to receive response data as server-sent events. type: boolean nullable: true default: false user: type: string example: user-1234 description: | Optional identifier for tracking end-user requests. Useful for usage monitoring and compliance. service_tier: type: string description: | Specifies the latency tier to use for processing the request. enum: - auto - default - flex nullable: true default: auto truncation: type: string description: | Context truncation strategy. Supported values: `auto` or `disabled`. enum: - auto - disabled nullable: true default: disabled required: - model - input ResponseInputItem: oneOf: - $ref: '#/components/schemas/ResponseEasyInputMessage' - $ref: '#/components/schemas/ResponseInputMessage' - $ref: '#/components/schemas/ResponseItemReference' - $ref: '#/components/schemas/ResponseFunctionCall' - $ref: '#/components/schemas/ResponseFunctionCallOutput' discriminator: propertyName: type mapping: message: '#/components/schemas/ResponseInputMessage' item_reference: '#/components/schemas/ResponseItemReference' function_call: '#/components/schemas/ResponseFunctionCall' function_call_output: '#/components/schemas/ResponseFunctionCallOutput' ResponseEasyInputMessage: type: object title: Easy input message description: | A message input to the model with a role indicating instruction following hierarchy. properties: role: type: string description: | The role of the message input. One of `user`, `assistant`, `system`, or `developer`. enum: - user - assistant - system - developer content: description: | Text input to the model. oneOf: - type: string title: Text input description: A text input to the model. - type: array title: Content array description: An array of content parts. items: $ref: '#/components/schemas/ResponseInputContent' required: - role - content ResponseInputMessage: type: object title: Input message description: | A message input to the model with explicit type field. properties: type: type: string description: The type of the message input. Always set to `message`. enum: - message x-stainless-const: true role: type: string description: > The role of the message input. One of `user`, `system`, or `developer`. Note: assistant role is not supported with explicit type. enum: - user - system - developer content: type: array description: A list of one or many input content items. items: $ref: '#/components/schemas/ResponseInputContent' status: type: string description: The status of item. Populated when items are returned via API. enum: - in_progress - completed - incomplete required: - type - role - content ResponseInputContent: oneOf: - $ref: '#/components/schemas/ResponseInputTextContent' discriminator: propertyName: type mapping: input_text: '#/components/schemas/ResponseInputTextContent' ResponseInputTextContent: type: object title: Input text description: A text input to the model. properties: type: type: string enum: - input_text description: The type of the input item. Always `input_text`. x-stainless-const: true text: type: string description: The text input to the model. required: - type - text ResponseItemReference: type: object title: Item reference description: An internal identifier for an item to reference. properties: type: type: string enum: - item_reference description: The type of item to reference. Always `item_reference`. x-stainless-const: true id: type: string description: The ID of the item to reference. required: - type - id ResponseFunctionCall: type: object title: Function call description: A function call generated by the model. properties: type: type: string enum: - function_call description: The type of the function call. Always `function_call`. x-stainless-const: true id: type: string description: The unique ID of the function tool call. call_id: type: string description: The unique ID of the function tool call generated by the model. name: type: string description: The name of the function to call. arguments: type: string description: A JSON string of the arguments to pass to the function. status: type: string description: The status of the item. enum: - in_progress - completed - incomplete required: - type - call_id - name - arguments ResponseFunctionCallOutput: type: object title: Function call output description: The output of a function tool call. properties: type: type: string enum: - function_call_output description: The type of the function tool call output. Always `function_call_output`. x-stainless-const: true id: type: string description: The unique ID of the function tool call output. call_id: type: string description: The unique ID of the function tool call generated by the model. output: type: string description: A JSON string of the output of the function tool call. status: type: string description: The status of the item. enum: - in_progress - completed - incomplete required: - type - call_id - output ResponseTool: type: object properties: type: type: string enum: - function description: The type of the tool. Currently, only `function` is supported. name: type: string description: >- The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. description: type: string description: Describes the function's purpose. The model uses this to determine when to invoke the function. parameters: $ref: '#/components/schemas/FunctionParameters' strict: type: boolean nullable: true description: Whether to enable strict schema adherence when generating the function call. required: - type - name ResponseToolChoiceOption: nullable: true description: > Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `required` means the model must call one or more tools. Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool. `none` is the default when no tools are present. `auto` is the default if tools are present. oneOf: - type: string description: > `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `required` means the model must call one or more tools. enum: - none - auto - required - $ref: '#/components/schemas/ResponseNamedToolChoice' ResponseNamedToolChoice: type: object description: Specifies a tool the model should use. Use to force the model to call a specific function. properties: type: type: string enum: - function description: The type of the tool. Currently, only `function` is supported. function: type: object properties: name: type: string description: The name of the function to call. required: - name required: - type - function ResponseFormatConfiguration: description: | An object specifying the format that the model must output. oneOf: - $ref: '#/components/schemas/ResponseFormatText' - $ref: '#/components/schemas/ResponseFormatJsonObject' - $ref: '#/components/schemas/TextResponseFormatJsonSchema' TextResponseFormatJsonSchema: type: object title: JSON schema description: | JSON Schema response format. Used to generate structured JSON responses. properties: type: type: string description: The type of response format being defined. Always `json_schema`. enum: - json_schema x-stainless-const: true name: type: string description: > The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. description: type: string description: > A description of what the response format is for, used by the model to determine how to respond in the format. schema: type: object description: | The schema for the response format, described as a JSON Schema object. additionalProperties: true strict: type: boolean nullable: true default: false description: | Whether to enable strict schema adherence when generating the output. required: - type - name - schema CreateResponseResponse: type: object description: Represents a response returned by model, based on the provided input. properties: id: type: string description: A unique identifier for the response. object: type: string description: The object type, which is always `response`. enum: - response status: type: string description: > The status of the response generation. One of `completed`, `failed`, `in_progress`, or `incomplete`. enum: - completed - failed - in_progress - incomplete created_at: type: integer description: The Unix timestamp (in seconds) of when the response was created. output: type: array description: An array of content items generated by the model. items: $ref: '#/components/schemas/ResponseOutputItem' previous_response_id: type: string description: Not supported. Always null. nullable: true model: type: string description: The model used for the response. reasoning: type: object nullable: true description: >- Configuration options for [models that support reasoning](https://console.groq.com/docs/reasoning). properties: effort: type: string enum: - low - medium - high nullable: true description: The reasoning effort level used. summary: type: string nullable: true description: Not supported. Always null. max_output_tokens: type: integer nullable: true description: The maximum number of tokens configured for the response. instructions: type: string nullable: true description: The system instructions used for the response. text: type: object description: Text format configuration used for the response. properties: format: $ref: '#/components/schemas/ResponseFormatConfiguration' tools: type: array description: The tools that were available to the model. items: $ref: '#/components/schemas/ResponseTool' tool_choice: $ref: '#/components/schemas/ResponseToolChoiceOption' truncation: type: string description: The truncation strategy used. enum: - auto - disabled metadata: type: object nullable: true additionalProperties: type: string description: Metadata attached to the response. temperature: type: number description: The sampling temperature used. top_p: type: number description: The nucleus sampling parameter used. user: type: string nullable: true description: The user identifier. service_tier: type: string description: The service tier used for processing. enum: - auto - default - flex error: type: object nullable: true description: An error object if the response failed. properties: code: type: string description: The error code. message: type: string description: A human-readable error message. required: - code - message incomplete_details: type: object nullable: true description: Details about why the response is incomplete. properties: reason: type: string description: The reason why the response is incomplete. usage: $ref: '#/components/schemas/ResponseUsage' parallel_tool_calls: type: boolean description: Whether the model can run tool calls in parallel. store: type: boolean description: Whether the response was stored. background: type: boolean description: Whether the response was generated in the background. default: false top_logprobs: type: integer description: The number of top log probabilities returned. default: 0 max_tool_calls: type: integer nullable: true description: The maximum number of tool calls allowed. required: - id - object - status - created_at - output - model - tools - tool_choice - truncation - metadata - temperature - top_p - service_tier - error - incomplete_details - parallel_tool_calls - store ResponseOutputItem: oneOf: - $ref: '#/components/schemas/ResponseOutputMessage' - $ref: '#/components/schemas/ResponseOutputFunctionCall' - $ref: '#/components/schemas/ResponseOutputReasoning' discriminator: propertyName: type mapping: message: '#/components/schemas/ResponseOutputMessage' function_call: '#/components/schemas/ResponseOutputFunctionCall' reasoning: '#/components/schemas/ResponseOutputReasoning' ResponseOutputMessage: type: object title: Output message description: An output message from the model. properties: type: type: string description: The type of the output message. Always `message`. enum: - message x-stainless-const: true id: type: string description: The unique ID of the output message. status: type: string description: The status of the message. enum: - in_progress - completed - incomplete role: type: string description: The role of the output message. Always `assistant`. enum: - assistant x-stainless-const: true content: type: array description: The content of the output message. items: $ref: '#/components/schemas/ResponseOutputContent' required: - type - id - role - content ResponseOutputContent: oneOf: - $ref: '#/components/schemas/ResponseOutputTextContent' discriminator: propertyName: type mapping: output_text: '#/components/schemas/ResponseOutputTextContent' ResponseOutputTextContent: type: object title: Output text description: A text output from the model. properties: type: type: string enum: - output_text description: The type of the output text. Always `output_text`. x-stainless-const: true text: type: string description: The text output from the model. annotations: type: array description: The annotations of the text output. items: $ref: '#/components/schemas/ResponseAnnotation' logprobs: type: array nullable: true description: Log probability information for the output. items: type: string required: - type - text - annotations ResponseAnnotation: oneOf: - $ref: '#/components/schemas/ResponseFileCitation' - $ref: '#/components/schemas/ResponseUrlCitation' discriminator: propertyName: type mapping: file_citation: '#/components/schemas/ResponseFileCitation' url_citation: '#/components/schemas/ResponseUrlCitation' ResponseFileCitation: type: object title: File citation description: A citation to a file. properties: type: type: string enum: - file_citation description: The type of the annotation. Always `file_citation`. x-stainless-const: true file_id: type: string description: The ID of the file. index: type: integer description: The index of the citation in the text. required: - type - file_id ResponseUrlCitation: type: object title: URL citation description: A citation for a web resource. properties: type: type: string enum: - url_citation description: The type of the annotation. Always `url_citation`. x-stainless-const: true url: type: string description: The URL of the web resource. start_index: type: integer description: The index of the first character of the URL citation in the message. end_index: type: integer description: The index of the last character of the URL citation in the message. title: type: string description: The title of the web resource. required: - type - url ResponseOutputFunctionCall: type: object title: Function call description: A function call generated by the model. properties: type: type: string enum: - function_call description: The type of the function call. Always `function_call`. x-stainless-const: true id: type: string description: The unique ID of the function tool call. status: type: string description: The status of the function call. enum: - in_progress - completed - incomplete call_id: type: string description: The unique ID of the function tool call generated by the model. name: type: string description: The name of the function to call. arguments: type: string description: A JSON string of the arguments to pass to the function. required: - type - id - call_id - name - arguments ResponseOutputReasoning: type: object title: Reasoning description: >- A reasoning output from the model. Available for [models that support reasoning](https://console.groq.com/docs/reasoning). properties: type: type: string enum: - reasoning description: The type of the reasoning output. Always `reasoning`. x-stainless-const: true id: type: string description: The unique ID of the reasoning output. summary: type: array description: Summary items (currently empty). items: type: object required: - type - id - summary ResponseUsage: type: object description: Usage statistics for the response request. properties: input_tokens: type: integer description: Number of tokens in the input. input_tokens_details: type: object description: Breakdown of input tokens. properties: cached_tokens: type: integer description: Number of cached tokens. reasoning_tokens: type: integer description: Number of reasoning tokens. required: - cached_tokens output_tokens: type: integer description: Number of tokens in the generated output. output_tokens_details: type: object description: Breakdown of output tokens. properties: cached_tokens: type: integer description: Number of cached tokens. reasoning_tokens: type: integer description: Number of reasoning tokens. required: - cached_tokens - reasoning_tokens total_tokens: type: integer description: Total number of tokens used in the request (input + output). required: - input_tokens - input_tokens_details - output_tokens - output_tokens_details - total_tokens security: - api_key: [] x-groq-metadata: groups: - id: chat type: endpoints title: Chat description: '' sections: - type: endpoint key: createChatCompletion path: create - id: responses type: endpoints title: Responses (beta) description: '' sections: - type: endpoint key: createResponse path: create - id: audio type: endpoints title: Audio description: '' sections: - type: endpoint key: createTranscription path: transcription - type: endpoint key: createTranslation path: translation - type: endpoint key: createSpeech path: speech - id: models type: endpoints title: Models description: '' sections: - type: endpoint key: listModels path: list - type: endpoint key: retrieveModel path: retrieve - id: batches type: endpoints title: Batches description: '' sections: - type: endpoint key: createBatch path: create - type: endpoint key: retrieveBatch path: retrieve - type: endpoint key: listBatches path: list - type: endpoint key: cancelBatch path: cancel - id: files type: endpoints title: Files description: '' sections: - type: endpoint key: uploadFile path: upload - type: endpoint key: listFiles path: list - type: endpoint key: deleteFile path: delete - type: endpoint key: retrieveFile path: retrieve - type: endpoint key: downloadFile path: download - id: fine-tuning type: endpoints title: Fine Tuning description: '' sections: - type: endpoint key: listFineTunings path: list - type: endpoint key: createFineTuning path: create - type: endpoint key: getFineTuning path: get - type: endpoint key: deleteFineTuning path: delete