{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "properties": {
    "$schema": {
      "description": "JSON Schema URI for editor validation. Typically set automatically by SchemaStore; can be added explicitly.",
      "type": "string"
    },
    "version": {
      "type": "string",
      "const": "1",
      "description": "Schema version. Must be '1'.",
      "examples": [
        "1"
      ]
    },
    "strategy": {
      "description": "Default chunking strategy applied to all files in this directory tree unless overridden by a more specific rule.",
      "type": "object",
      "properties": {
        "chunk_by": {
          "type": "string",
          "enum": [
            "h1",
            "h2",
            "h3",
            "file"
          ],
          "description": "The heading level at which to split markdown into chunks. 'h1' splits at top-level headings, 'h2'/'h3' at progressively finer granularity, and 'file' treats the entire file as one chunk.",
          "examples": [
            "h2"
          ]
        },
        "max_chunk_size": {
          "description": "Maximum chunk size in characters (default: 20000). Oversized chunks are first split recursively at finer heading levels (e.g. h2→h3→h4→…→h6), preserving semantic structure and breadcrumbs. Only when no further sub-headings exist does it fall back to AST node boundary splitting.",
          "examples": [
            8000
          ],
          "type": "integer",
          "exclusiveMinimum": 0,
          "maximum": 9007199254740991
        },
        "min_chunk_size": {
          "description": "Minimum chunk size in characters. Trailing chunks smaller than this are merged into the preceding chunk to avoid fragments.",
          "examples": [
            200
          ],
          "type": "integer",
          "exclusiveMinimum": 0,
          "maximum": 9007199254740991
        }
      },
      "required": [
        "chunk_by"
      ],
      "additionalProperties": false
    },
    "metadata": {
      "description": "Key-value pairs attached to every chunk produced from this directory tree. Each key becomes a filterable taxonomy dimension exposed as an enum parameter on the search tool.",
      "examples": [
        {
          "language": "typescript",
          "scope": "sdk-specific"
        }
      ],
      "type": "object",
      "propertyNames": {
        "type": "string"
      },
      "additionalProperties": {
        "type": "string"
      }
    },
    "taxonomy": {
      "examples": [
        {
          "language": {
            "vector_collapse": true
          }
        }
      ],
      "type": "object",
      "propertyNames": {
        "type": "string"
      },
      "additionalProperties": {
        "type": "object",
        "properties": {
          "vector_collapse": {
            "default": false,
            "description": "When true, this taxonomy dimension identifies content variants that are near-identical in vector space (e.g. the same API operation documented in multiple SDK languages). At search time, results sharing the same content identity — determined by normalizing this field's value out of the filepath — are collapsed to the highest-scoring result. Has no effect when a filter for this field is active, since the filter already restricts to a single value.",
            "type": "boolean"
          },
          "properties": {
            "description": "Per-value configuration for this taxonomy dimension. Keys are taxonomy values (e.g. 'typescript', 'python').",
            "type": "object",
            "propertyNames": {
              "type": "string"
            },
            "additionalProperties": {
              "type": "object",
              "properties": {
                "mcp_resource": {
                  "default": false,
                  "description": "When true, documents with this taxonomy dimension value are exposed as first-class MCP resources.",
                  "type": "boolean"
                }
              },
              "required": [
                "mcp_resource"
              ],
              "additionalProperties": false,
              "description": "Per-value configuration for a taxonomy dimension value."
            }
          }
        },
        "required": [
          "vector_collapse"
        ],
        "additionalProperties": false,
        "description": "Configuration for a taxonomy field's search-time behavior."
      },
      "description": "Per-field configuration for taxonomy dimensions. Controls search-time behavior such as cross-language result collapsing."
    },
    "mcpServerInstructions": {
      "description": "Custom MCP server instructions sent to clients during initialization. Helps coding agents understand what this server provides and how to use it effectively.",
      "type": "string"
    },
    "overrides": {
      "description": "Per-file-pattern overrides for chunking strategy and metadata. Evaluated top-to-bottom; last match wins.",
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "pattern": {
            "type": "string",
            "minLength": 1,
            "description": "A glob pattern matched against file paths relative to the directory containing the manifest.",
            "examples": [
              "guides/advanced/*.md"
            ]
          },
          "strategy": {
            "description": "Chunking strategy override for files matching this pattern. Replaces the root strategy entirely.",
            "type": "object",
            "properties": {
              "chunk_by": {
                "type": "string",
                "enum": [
                  "h1",
                  "h2",
                  "h3",
                  "file"
                ],
                "description": "The heading level at which to split markdown into chunks. 'h1' splits at top-level headings, 'h2'/'h3' at progressively finer granularity, and 'file' treats the entire file as one chunk.",
                "examples": [
                  "h2"
                ]
              },
              "max_chunk_size": {
                "description": "Maximum chunk size in characters (default: 20000). Oversized chunks are first split recursively at finer heading levels (e.g. h2→h3→h4→…→h6), preserving semantic structure and breadcrumbs. Only when no further sub-headings exist does it fall back to AST node boundary splitting.",
                "examples": [
                  8000
                ],
                "type": "integer",
                "exclusiveMinimum": 0,
                "maximum": 9007199254740991
              },
              "min_chunk_size": {
                "description": "Minimum chunk size in characters. Trailing chunks smaller than this are merged into the preceding chunk to avoid fragments.",
                "examples": [
                  200
                ],
                "type": "integer",
                "exclusiveMinimum": 0,
                "maximum": 9007199254740991
              }
            },
            "required": [
              "chunk_by"
            ],
            "additionalProperties": false
          },
          "metadata": {
            "description": "Metadata key-value pairs merged with root metadata for matching files (override keys win). Each key becomes a filterable taxonomy dimension in the search API.",
            "examples": [
              {
                "scope": "global-guide"
              }
            ],
            "type": "object",
            "propertyNames": {
              "type": "string"
            },
            "additionalProperties": {
              "type": "string"
            }
          }
        },
        "required": [
          "pattern"
        ],
        "additionalProperties": false,
        "description": "Overrides the default chunking strategy and/or metadata for files matching a glob pattern. Within the overrides array, later matches take precedence."
      }
    }
  },
  "required": [
    "version"
  ],
  "additionalProperties": false,
  "description": "Docs MCP configuration file (.docs-mcp.json) that controls how documentation is chunked, tagged, and indexed for search."
}