{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://trychroma.com/schemas/chroma/collection.json", "title": "Chroma Collection", "description": "A Chroma collection stores embeddings, documents, and associated metadata. Collections are the primary unit for organizing and searching vector data within a database.", "type": "object", "required": ["id", "name"], "properties": { "id": { "type": "string", "format": "uuid", "description": "The unique identifier of the collection, assigned by the server on creation" }, "name": { "type": "string", "minLength": 1, "maxLength": 512, "pattern": "^[a-zA-Z0-9_-]+$", "description": "The name of the collection, used as a human-readable identifier within a database" }, "metadata": { "type": ["object", "null"], "additionalProperties": true, "description": "Arbitrary key-value metadata associated with the collection, used for organizing and describing the collection's purpose and configuration" }, "tenant": { "type": "string", "description": "The name of the tenant this collection belongs to" }, "database": { "type": "string", "description": "The name of the database this collection belongs to" } }, "$defs": { "CollectionConfiguration": { "type": "object", "description": "Configuration options that can be set in collection metadata to control embedding and indexing behavior", "properties": { "hnsw:space": { "type": "string", "enum": ["l2", "ip", "cosine"], "default": "l2", "description": "The distance function used for nearest neighbor search. l2 is Euclidean distance, ip is inner product, cosine is cosine similarity." }, "hnsw:construction_ef": { "type": "integer", "minimum": 1, "default": 100, "description": "The size of the dynamic candidate list during HNSW index construction. Higher values improve recall at the cost of indexing speed." }, "hnsw:search_ef": { "type": "integer", "minimum": 1, "default": 10, "description": "The size of the dynamic candidate list during search. Higher values improve recall at the cost of search speed." }, "hnsw:M": { "type": "integer", "minimum": 2, "default": 16, "description": "The maximum number of bi-directional links per element in the HNSW graph. Higher values improve recall at the cost of memory." }, "hnsw:num_threads": { "type": "integer", "minimum": 1, "default": 4, "description": "Number of threads to use during HNSW index construction" } } } } }