{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/api-evangelist/reducto/refs/heads/main/json-schema/reducto-syncsplitconfig.json", "title": "SyncSplitConfig", "description": "Reducto SyncSplitConfig schema", "properties": { "input": { "anyOf": [ { "type": "string" }, { "items": { "type": "string" }, "type": "array" }, { "$ref": "#/components/schemas/UploadResponse" } ], "title": "Input", "description": "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " }, "parsing": { "$ref": "#/components/schemas/ParseOptions", "description": "The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored.", "default": { "enhance": { "agentic": [], "intelligent_ordering": false, "summarize_figures": true }, "retrieval": { "chunking": { "chunk_mode": "disabled", "chunk_overlap": 0 }, "embedding_optimized": false, "filter_blocks": [] }, "formatting": { "add_page_markers": false, "include": [], "merge_tables": false, "table_output_format": "dynamic" }, "spreadsheet": { "clustering": "accurate", "exclude": [], "include": [], "split_large_tables": { "enabled": true, "size": 50 } }, "settings": { "embed_pdf_metadata": false, "embed_pdf_metadata_dpi": 100, "extraction_mode": "hybrid", "force_url_result": false, "hybrid_vpc": {}, "ocr_system": "standard", "persist_results": false, "return_images": [], "return_ocr_data": false } } }, "split_description": { "items": { "$ref": "#/components/schemas/SplitCategory" }, "type": "array", "title": "Split Description", "description": "The configuration options for processing the document." }, "split_rules": { "type": "string", "title": "Split Rules", "description": "The prompt that describes rules for splitting the document.", "default": "Split the document into the applicable sections. Sections may only overlap at their first and last page if at all." }, "settings": { "$ref": "#/components/schemas/SplitSettings", "description": "The settings for split processing.", "default": { "table_cutoff": "truncate", "allow_page_overlap": true, "deep_split": false } } }, "type": "object", "required": [ "input", "split_description" ] }