{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://api.cohere.com/schemas/cohere/dataset.json", "title": "Cohere Dataset", "description": "Represents a dataset managed through the Cohere Datasets API, used for embed jobs, fine-tuning, and other batch processing tasks.", "type": "object", "required": ["id", "name", "dataset_type"], "properties": { "id": { "type": "string", "description": "The unique identifier of the dataset." }, "name": { "type": "string", "description": "The human-readable name of the dataset." }, "dataset_type": { "type": "string", "description": "The type of dataset, which determines its schema and compatible operations.", "enum": [ "embed-input", "embed-output", "reranker-finetune-input", "prompt-completion-finetune-input", "single-label-classification-finetune-input", "chat-finetune-input" ] }, "validation_status": { "type": "string", "description": "The current validation status of the dataset after upload.", "enum": ["Unknown", "Queued", "Processing", "Validated", "Skipped", "Failed"] }, "created_at": { "type": "string", "format": "date-time", "description": "The ISO 8601 timestamp when the dataset was created." }, "updated_at": { "type": "string", "format": "date-time", "description": "The ISO 8601 timestamp when the dataset was last updated." }, "schema": { "type": "string", "description": "The expected schema definition for the dataset records." }, "required_fields": { "type": "array", "description": "The field names required in each record of the dataset.", "items": { "type": "string" } }, "preserve_fields": { "type": "array", "description": "The field names that are preserved through processing.", "items": { "type": "string" } }, "validation_error": { "type": "string", "description": "The error message if dataset validation failed." }, "validation_warnings": { "type": "array", "description": "Warning messages for rows that were dropped during validation.", "items": { "type": "string" } } } }