{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/TokenizeRequest", "title": "TokenizeRequest", "type": "object", "description": "Request body for the generic tokenize endpoint.", "required": [ "text", "tokenization" ], "properties": { "text": { "type": "string", "description": "The text to tokenize." }, "tokenization": { "type": "string", "description": "The tokenization method to apply.", "enum": [ "word", "lowercase", "whitespace", "field", "trigram", "gse", "kagome_kr", "kagome_ja", "gse_ch" ] }, "analyzerConfig": { "$ref": "#/components/schemas/TextAnalyzerConfig" }, "stopwords": { "$ref": "#/components/schemas/StopwordConfig" }, "stopwordPresets": { "type": "object", "description": "Optional user-defined named stopword presets. Shape matches InvertedIndexConfig.stopwordPresets on a collection: each key is a preset name, each value is a plain list of stopwords. A preset name that matches a built-in ('en', 'none') fully replaces the built-in. Preset names must not be empty or whitespace-only; each word list must contain at least one word; individual words must not be empty or whitespace-only. Mutually exclusive with stopwords \u2014 pass one or the other, not both.", "additionalProperties": { "type": "array", "items": { "type": "string" } } } } }