openapi: 3.0.3 info: title: Reducto API description: REST API for parsing, extracting, splitting, classifying, and editing complex documents. Supports PDFs, Word files, spreadsheets, presentations, and scanned images using layout-aware OCR and vision language models. version: v1.11.80-78-gc5c4ff11c contact: url: https://reducto.ai/ email: support@reducto.ai license: name: Proprietary servers: - url: https://platform.reducto.ai security: - BearerAuth: [] paths: /parse: post: summary: Parse operationId: parse_parse_post requestBody: content: application/json: schema: oneOf: - $ref: '#/components/schemas/SyncParseConfig' - $ref: '#/components/schemas/AsyncParseConfig' required: true responses: '200': description: Successful Response content: application/json: schema: anyOf: - $ref: '#/components/schemas/ParseResponse' - $ref: '#/components/schemas/AsyncParseResponse' title: Response Parse Parse Post '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /parse_async: post: summary: Async Parse operationId: async_parse_parse_async_post requestBody: content: application/json: schema: $ref: '#/components/schemas/AsyncParseConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AsyncParseResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /extract: post: summary: Extract operationId: extract_extract_post requestBody: content: application/json: schema: oneOf: - $ref: '#/components/schemas/SyncExtractConfig' - $ref: '#/components/schemas/AsyncExtractConfig' required: true responses: '200': description: Successful Response content: application/json: schema: oneOf: - $ref: '#/components/schemas/V3ExtractResponse' - $ref: '#/components/schemas/AsyncExtractResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /extract_async: post: summary: Extract Async operationId: extract_async_extract_async_post requestBody: content: application/json: schema: $ref: '#/components/schemas/AsyncExtractConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AsyncExtractResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /split: post: summary: Split operationId: split_split_post requestBody: content: application/json: schema: $ref: '#/components/schemas/SyncSplitConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/SplitResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /split_async: post: summary: Split Async operationId: split_async_split_async_post requestBody: content: application/json: schema: $ref: '#/components/schemas/config__v3__AsyncSplitConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AsyncSplitResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /edit: post: summary: Edit operationId: edit_edit_post requestBody: content: application/json: schema: $ref: '#/components/schemas/EditConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/EditResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /edit_async: post: summary: Edit Async operationId: edit_async_edit_async_post requestBody: content: application/json: schema: $ref: '#/components/schemas/AsyncEditConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AsyncEditResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /pipeline: post: summary: Pipeline operationId: pipeline_pipeline_post requestBody: content: application/json: schema: $ref: '#/components/schemas/V3PipelineConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/PipelineResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /pipeline_async: post: summary: Pipeline Async operationId: pipeline_async_pipeline_async_post requestBody: content: application/json: schema: $ref: '#/components/schemas/V3AsyncPipelineConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/AsyncPipelineResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /classify: post: summary: Classify operationId: classify_classify_post requestBody: content: application/json: schema: $ref: '#/components/schemas/ClassifyConfig' required: true responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/ClassifyResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /job/{job_id}: get: summary: Retrieve Parse operationId: retrieve_parse_job__job_id__get parameters: - name: job_id in: path required: true schema: type: string title: Job Id responses: '200': description: Successful Response content: application/json: schema: anyOf: - $ref: '#/components/schemas/AsyncJobResponse' - $ref: '#/components/schemas/EnhancedAsyncJobResponse' title: Response Retrieve Parse Job Job Id Get '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /cancel/{job_id}: post: summary: Cancel Job operationId: cancel_job_cancel__job_id__post parameters: - name: job_id in: path required: true schema: type: string title: Job Id responses: '200': description: Successful Response content: application/json: schema: {} '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /jobs: get: summary: Get Jobs operationId: get_jobs_jobs_get parameters: - name: exclude_configs in: query required: false schema: type: boolean description: Exclude raw_config from response to reduce size default: false title: Exclude Configs description: Exclude raw_config from response to reduce size - name: cursor in: query required: false schema: type: string nullable: true description: Cursor for pagination. Use the next_cursor from the previous response to fetch the next page. title: Cursor description: Cursor for pagination. Use the next_cursor from the previous response to fetch the next page. - name: limit in: query required: false schema: type: integer maximum: 500 minimum: 1 description: Maximum number of jobs to return per page. Defaults to 100, max 500. default: 100 title: Limit description: Maximum number of jobs to return per page. Defaults to 100, max 500. responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/JobsResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /upload: post: summary: Upload operationId: upload_upload_post parameters: - name: extension in: query required: false schema: type: string nullable: true title: Extension requestBody: content: multipart/form-data: schema: $ref: '#/components/schemas/Body_upload_upload_post' application/json: schema: $ref: '#/components/schemas/Body_upload_upload_post' responses: '200': description: Successful Response content: application/json: schema: $ref: '#/components/schemas/UploadResponse' '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /configure_webhook: post: summary: Webhook Portal operationId: webhook_portal_configure_webhook_post responses: '200': description: Successful Response content: application/json: schema: type: string title: Response Webhook Portal Configure Webhook Post '422': description: Validation Error content: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' /version: get: summary: Get Version operationId: get_version_version_get responses: '200': description: Successful Response content: application/json: schema: type: string title: Response Get Version Version Get components: securitySchemes: BearerAuth: type: http scheme: bearer description: API key passed as a Bearer token in the Authorization header. schemas: AsyncEditConfig: properties: document_url: anyOf: - type: string - $ref: '#/components/schemas/UploadResponse' title: Document Url description: 'The URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document ' edit_instructions: type: string title: Edit Instructions description: The instructions for the edit. edit_options: $ref: '#/components/schemas/EditOptions' default: color: '#FF0000' enable_overflow_pages: false flatten: false form_schema: items: $ref: '#/components/schemas/EditWidget' type: array nullable: true title: Form Schema description: Form schema for PDF forms. List of widgets with their types, descriptions, and bounding boxes. Only works for PDFs. priority: type: boolean title: Priority description: If True, attempts to process the job with priority if the user has priority processing budget available; by default, sync jobs are prioritized above async jobs. default: false webhook: $ref: '#/components/schemas/WebhookConfigNew' default: mode: disabled channels: [] type: object required: - document_url - edit_instructions title: AsyncEditConfig AsyncEditResponse: properties: job_id: type: string title: Job Id type: object required: - job_id title: AsyncEditResponse AsyncExtractConfig: properties: async: $ref: '#/components/schemas/config__v3__AsyncConfig' description: The configuration options for asynchronous processing (default synchronous). default: priority: false input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " parsing: $ref: '#/components/schemas/ParseOptions' description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored. default: enhance: agentic: [] intelligent_ordering: false summarize_figures: true retrieval: chunking: chunk_mode: disabled chunk_overlap: 0 embedding_optimized: false filter_blocks: [] formatting: add_page_markers: false include: [] merge_tables: false table_output_format: dynamic spreadsheet: clustering: accurate exclude: [] include: [] split_large_tables: enabled: true size: 50 settings: embed_pdf_metadata: false embed_pdf_metadata_dpi: 100 extraction_mode: hybrid force_url_result: false hybrid_vpc: {} ocr_system: standard persist_results: false return_images: [] return_ocr_data: false instructions: $ref: '#/components/schemas/Instructions' description: The instructions to use for the extraction. default: schema: {} system_prompt: Be precise and thorough. settings: $ref: '#/components/schemas/ExtractSettings' description: The settings to use for the extraction. default: include_images: false optimize_for_latency: false array_extract: false deep_extract: false citations: enabled: false numerical_confidence: true type: object required: - input title: AsyncExtractConfig AsyncExtractResponse: properties: job_id: type: string title: Job Id type: object required: - job_id title: AsyncExtractResponse AsyncJobResponse: properties: status: type: string enum: - Pending - Completed - Failed - Idle title: Status result: oneOf: - $ref: '#/components/schemas/ParseResponse' - $ref: '#/components/schemas/ExtractResponse' - $ref: '#/components/schemas/SplitResponse' - $ref: '#/components/schemas/EditResponse' - $ref: '#/components/schemas/PipelineResponse' - $ref: '#/components/schemas/V3ExtractResponse' - $ref: '#/components/schemas/ClassifyResponse' discriminator: propertyName: response_type mapping: classify: '#/components/schemas/ClassifyResponse' edit: '#/components/schemas/EditResponse' extract: '#/components/schemas/ExtractResponse' parse: '#/components/schemas/ParseResponse' pipeline: '#/components/schemas/PipelineResponse' split: '#/components/schemas/SplitResponse' v3_extract: '#/components/schemas/V3ExtractResponse' nullable: true title: Result progress: type: number nullable: true title: Progress reason: type: string nullable: true title: Reason type: object required: - status title: AsyncJobResponse AsyncParseConfig: properties: async: $ref: '#/components/schemas/config__v3__AsyncConfig' description: The configuration options for asynchronous processing (default synchronous). default: priority: false input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " enhance: $ref: '#/components/schemas/Enhance' default: agentic: [] summarize_figures: true intelligent_ordering: false retrieval: $ref: '#/components/schemas/Retrieval' default: chunking: chunk_mode: disabled chunk_overlap: 0 filter_blocks: [] embedding_optimized: false formatting: $ref: '#/components/schemas/Formatting' default: add_page_markers: false table_output_format: dynamic merge_tables: false include: [] spreadsheet: $ref: '#/components/schemas/Spreadsheet' default: split_large_tables: enabled: true size: 50 include: [] clustering: accurate exclude: [] settings: $ref: '#/components/schemas/Settings' default: ocr_system: standard extraction_mode: hybrid force_url_result: false return_ocr_data: false return_images: [] embed_pdf_metadata: false embed_pdf_metadata_dpi: 100 persist_results: false hybrid_vpc: {} queue_priority: $ref: '#/components/schemas/QueuePriority' description: 'Queue priority. ''batch'' for non-urgent work that processes when spare GPU capacity is available. ''auto'' (alias: ''standard'') uses the default queue.' default: auto type: object required: - input title: AsyncParseConfig AsyncParseResponse: properties: job_id: type: string title: Job Id type: object required: - job_id title: AsyncParseResponse AsyncPipelineResponse: properties: job_id: type: string title: Job Id type: object required: - job_id title: AsyncPipelineResponse AsyncSplitResponse: properties: job_id: type: string title: Job Id type: object required: - job_id title: AsyncSplitResponse Body_upload_upload_post: properties: file: anyOf: - type: string format: binary - type: string title: File nullable: true type: object title: Body_upload_upload_post BoundingBox: properties: left: type: number title: Left top: type: number title: Top width: type: number title: Width height: type: number title: Height page: type: integer title: Page description: The page number of the bounding box (1-indexed). original_page: type: integer title: Original Page description: The page number in the original document of the bounding box (1-indexed). type: object required: - left - top - width - height - page title: BoundingBox CategoryConfidence: properties: category: type: string title: Category confidence: type: number title: Confidence criteria_confidence: items: $ref: '#/components/schemas/CriteriaConfidence' type: array title: Criteria Confidence type: object required: - category - confidence - criteria_confidence title: CategoryConfidence description: Confidence result for a category. Chunking: properties: chunk_mode: type: string enum: - variable - section - page - disabled - block - page_sections title: Chunk Mode description: Choose how to partition chunks. Variable mode chunks by character length and visual context. Section mode chunks by section headers. Page mode chunks according to pages. Page sections mode chunks first by page, then by sections within each page. Disabled returns one single chunk. default: disabled chunk_size: type: integer nullable: true title: Chunk Size description: The approximate size of chunks (in characters) that the document will be split into. Defaults to null, in which case the chunk size is variable between 250 - 1500 characters. chunk_overlap: type: integer title: Chunk Overlap description: Number of characters of overlap to include from adjacent chunks. Defaults to 0. default: 0 type: object title: Chunking Citations: properties: enabled: type: boolean title: Enabled description: If True, include citations in the extraction. default: false numerical_confidence: type: boolean title: Numerical Confidence description: If True, enable numeric citation confidence scores. Defaults to True. default: true type: object title: Citations ClassificationCategory: properties: category: type: string title: Category description: The category name/label that documents will be classified into (e.g., 'invoice', 'contract', 'receipt'). criteria: items: type: string type: array title: Criteria description: A list of criteria, keywords, or descriptions that define what characteristics a document must have to be classified into this category (e.g., ['contains billing information', 'has itemized charges']). type: object required: - category - criteria title: ClassificationCategory description: A single classification category with its matching criteria. ClassifyConfig: properties: persist_results: type: boolean title: Persist Results description: If True, persist the results indefinitely. Defaults to False. default: false input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " classification_schema: items: $ref: '#/components/schemas/ClassificationCategory' type: array title: Classification Schema description: A list of classification categories and their matching criteria. default: [] page_range: anyOf: - $ref: '#/components/schemas/PageRange' - items: $ref: '#/components/schemas/PageRange' type: array - items: type: integer type: array title: Page Range description: The page range to process (1-indexed). By default, the first 5 pages are used. If more than 25 pages are selected, only the first 25 (after sorting) are used. Only applies to PDFs; ignored for other document types. nullable: true document_metadata: type: string nullable: true title: Document Metadata description: Optional document-level metadata to include in classification prompts. type: object required: - input title: ClassifyConfig ClassifyResponse: properties: response_type: type: string title: Response Type default: classify enum: - classify job_id: type: string title: Job Id result: $ref: '#/components/schemas/ClassifyResponseCategory' response_confidence: $ref: '#/components/schemas/ResponseConfidence' nullable: true usage: $ref: '#/components/schemas/ClassifyUsage' nullable: true duration: type: number nullable: true title: Duration description: The duration of the classify request in seconds. type: object required: - job_id - result title: ClassifyResponse description: Response from classify job - returned when polling /job/{job_id} ClassifyResponseCategory: properties: category: type: string title: Category type: object required: - category title: ClassifyResponseCategory ClassifyUsage: properties: num_pages: type: integer title: Num Pages num_categories: type: integer title: Num Categories credits: type: number nullable: true title: Credits type: object required: - num_pages - num_categories title: ClassifyUsage CriteriaConfidence: properties: criterion: type: string title: Criterion confidence: type: string enum: - high - low title: Confidence type: object required: - criterion - confidence title: CriteriaConfidence description: Confidence result for a single criterion. DeepSplit: properties: name: type: string title: Name pages: items: $ref: '#/components/schemas/DeepSplitPageEvidence' type: array title: Pages partitions: items: $ref: '#/components/schemas/DeepSplitPartition' type: array nullable: true title: Partitions type: object required: - name - pages title: DeepSplit DeepSplitPageEvidence: properties: page_number: type: integer title: Page Number evidence: type: string title: Evidence confidence: type: string enum: - high - medium - low nullable: true title: Confidence type: object required: - page_number - evidence title: DeepSplitPageEvidence DeepSplitPartition: properties: name: type: string title: Name pages: items: $ref: '#/components/schemas/DeepSplitPageEvidence' type: array title: Pages type: object required: - name - pages title: DeepSplitPartition DeepSplitResult: properties: splits: items: $ref: '#/components/schemas/DeepSplit' type: array title: Splits type: object required: - splits title: DeepSplitResult DirectWebhookConfig: properties: mode: type: string title: Mode default: direct enum: - direct url: type: string title: Url type: object required: - url title: DirectWebhookConfig EditConfig: properties: document_url: anyOf: - type: string - $ref: '#/components/schemas/UploadResponse' title: Document Url description: 'The URL of the document to be processed. You can provide one of the following: 1. A publicly available URL 2. A presigned S3 URL 3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document ' edit_instructions: type: string title: Edit Instructions description: The instructions for the edit. edit_options: $ref: '#/components/schemas/EditOptions' default: color: '#FF0000' enable_overflow_pages: false flatten: false form_schema: items: $ref: '#/components/schemas/EditWidget' type: array nullable: true title: Form Schema description: Form schema for PDF forms. List of widgets with their types, descriptions, and bounding boxes. Only works for PDFs. priority: type: boolean title: Priority description: If True, attempts to process the job with priority if the user has priority processing budget available; by default, sync jobs are prioritized above async jobs. default: true type: object required: - document_url - edit_instructions title: EditConfig EditOptions: properties: color: type: string pattern: ^#[0-9A-Fa-f]{6}$ title: Color description: The color to use for edits, in hex format. default: '#FF0000' font_size: type: number maximum: 72 minimum: 1 nullable: true title: Font Size description: The font size (in points) to use for filled text fields. If not specified, font size is automatically calculated based on field dimensions. llm_provider_preference: type: string enum: - openai - anthropic - google nullable: true title: Llm Provider Preference description: The LLM provider to use for edit processing. If not specified, defaults to 'google' enable_overflow_pages: type: boolean title: Enable Overflow Pages description: If True, creates overflow pages for text that doesn't fit in form fields. Defaults to False. default: false flatten: type: boolean title: Flatten description: If True, flattens form fields after filling, converting them to static content. Defaults to False. default: false type: object title: EditOptions EditResponse: properties: response_type: type: string title: Response Type default: edit enum: - edit document_url: type: string title: Document Url description: Presigned URL to download the edited document. form_schema: items: $ref: '#/components/schemas/EditWidget' type: array nullable: true title: Form Schema description: Form schema for PDF forms. List of widgets with their types, descriptions, and bounding boxes. usage: $ref: '#/components/schemas/ParseUsage' nullable: true description: Usage information for the edit operation, including number of pages and credits charged. type: object required: - document_url title: EditResponse EditWidget: properties: bbox: $ref: '#/components/schemas/BoundingBox' description: Bounding box coordinates of the widget description: type: string title: Description description: Description of the widget extracted from the document type: type: string enum: - text - checkbox - radio - dropdown - barcode title: Type description: Type of the form widget fill: type: boolean title: Fill description: If True (default), the system will attempt to fill this widget. If False, the widget will be created but intentionally left unfilled. default: true value: type: string nullable: true title: Value description: If provided, this value will be used directly instead of attempting to intelligently determine the field value. font_size: type: number maximum: 72 minimum: 1 nullable: true title: Font Size description: Font size in points for this specific field. Takes priority over the global font_size in EditOptions. If not set, falls back to the global font_size, then to auto-calculated sizing. type: object required: - bbox - description - type title: EditWidget Enhance: properties: agentic: items: anyOf: - $ref: '#/components/schemas/TableAgentic' - $ref: '#/components/schemas/FigureAgentic' - $ref: '#/components/schemas/TextAgentic' type: array title: Agentic description: Agentic uses vision language models to enhance the accuracy of the output of different types of extraction. This will incur a cost and latency increase. default: [] summarize_figures: type: boolean title: Summarize Figures description: If True, summarize figures using a small vision language model. Defaults to True. default: true intelligent_ordering: type: boolean title: Intelligent Ordering description: If True, use an advanced vision language model to improve reading order accuracy, with a small increase in latency. Defaults to False. default: false type: object title: Enhance EnhancedAsyncJobResponse: properties: status: type: string enum: - Pending - Completed - Failed - Idle title: Status result: oneOf: - $ref: '#/components/schemas/ParseResponse' - $ref: '#/components/schemas/ExtractResponse' - $ref: '#/components/schemas/SplitResponse' - $ref: '#/components/schemas/EditResponse' - $ref: '#/components/schemas/PipelineResponse' - $ref: '#/components/schemas/V3ExtractResponse' - $ref: '#/components/schemas/ClassifyResponse' discriminator: propertyName: response_type mapping: classify: '#/components/schemas/ClassifyResponse' edit: '#/components/schemas/EditResponse' extract: '#/components/schemas/ExtractResponse' parse: '#/components/schemas/ParseResponse' pipeline: '#/components/schemas/PipelineResponse' split: '#/components/schemas/SplitResponse' v3_extract: '#/components/schemas/V3ExtractResponse' nullable: true title: Result progress: type: number nullable: true title: Progress reason: type: string nullable: true title: Reason type: type: string enum: - Parse - Extract - Split - Edit - Pipeline - Classify nullable: true title: Type num_pages: type: integer nullable: true title: Num Pages total_pages: type: integer nullable: true title: Total Pages source: nullable: true title: Source duration: type: number nullable: true title: Duration created_at: type: string format: date-time nullable: true title: Created At raw_config: type: string nullable: true title: Raw Config bucket: nullable: true title: Bucket type: object required: - status title: EnhancedAsyncJobResponse ExtractResponse: additionalProperties: true type: object ExtractSettings: properties: include_images: type: boolean title: Include Images description: If True, include images in the extraction. default: false optimize_for_latency: type: boolean title: Optimize For Latency description: If True, jobs will be processed with a higher throughput and priority at a higher cost. Defaults to False. default: false array_extract: type: boolean title: Array Extract description: If True, use array extraction. default: false deep_extract: type: boolean title: Deep Extract description: If True, use Deep Extract, an agentic extraction mode that iteratively refines its output to achieve near-perfect accuracy. Best for complex documents where accuracy is critical. default: false citations: $ref: '#/components/schemas/Citations' description: The citations to use for the extraction. default: enabled: false numerical_confidence: true type: object title: ExtractSettings ExtractSplitResponse: properties: split_name: type: string title: Split Name page_range: items: type: integer type: array title: Page Range partition: type: string nullable: true title: Partition result: anyOf: - $ref: '#/components/schemas/ExtractResponse' - $ref: '#/components/schemas/V3ExtractResponse' title: Result type: object required: - split_name - page_range - result title: ExtractSplitResponse description: This is the response format for Extract -> Split Pipelines FigureAgentic: properties: scope: type: string title: Scope enum: - figure prompt: type: string nullable: true title: Prompt description: Custom prompt for figure agentic. advanced_chart_agent: type: boolean title: Advanced Chart Agent description: If True, use the advanced chart agent. Defaults to False. default: false return_overlays: type: boolean title: Return Overlays description: If True, return overlays for the figure. This is so you can use the overlays to double check the quality of the extraction default: false type: object required: - scope title: FigureAgentic Formatting: properties: add_page_markers: type: boolean title: Add Page Markers description: If True, add page markers to the output. Defaults to False. Useful for extracting data with page specific information. default: false table_output_format: type: string enum: - html - json - md - jsonbbox - dynamic - csv title: Table Output Format description: The mode to use for table output. Defaults to dynamic, which returns md for simpler tables and html for more complex tables. default: dynamic merge_tables: type: boolean title: Merge Tables description: A flag to indicate if consecutive tables with the same number of columns should be merged. Defaults to False. default: false include: items: type: string enum: - change_tracking - highlight - comments - hyperlinks - signatures - ignore_watermarks type: array title: Include description: A list of formatting to include in the output. default: [] type: object title: Formatting FullResult: properties: type: type: string title: Type description: type = 'full' enum: - full chunks: items: $ref: '#/components/schemas/ParseChunk' type: array title: Chunks ocr: $ref: '#/components/schemas/OCRResult' nullable: true custom: nullable: true title: Custom type: object required: - type - chunks title: FullResult GranularConfidence: properties: extract_confidence: type: number nullable: true title: Extract Confidence parse_confidence: type: number nullable: true title: Parse Confidence type: object title: GranularConfidence HTTPValidationError: properties: detail: items: $ref: '#/components/schemas/ValidationError' type: array title: Detail type: object title: HTTPValidationError HybridVpcSettings: properties: environment: type: string nullable: true title: Environment description: Named Hybrid VPC environment to use for this request. Only applies when your organization has Hybrid VPC environments configured. type: object title: HybridVpcSettings Instructions: properties: schema: title: Schema description: The JSON schema to use for the extraction. default: {} system_prompt: type: string title: System Prompt description: The system prompt to use for the extraction. default: Be precise and thorough. type: object title: Instructions JobsResponse: properties: jobs: items: $ref: '#/components/schemas/SingleJob' type: array title: Jobs description: List of jobs with their job_id, status, type, raw_config, created_at, num_pages and duration next_cursor: type: string nullable: true title: Next Cursor description: Cursor to fetch the next page of results. If null, there are no more results. type: object required: - jobs title: JobsResponse OCRLine: properties: text: type: string title: Text bbox: $ref: '#/components/schemas/BoundingBox' confidence: type: number nullable: true title: Confidence description: OCR confidence score between 0 and 1, where 1 indicates highest confidence chunk_index: type: integer nullable: true title: Chunk Index description: The index of the chunk that the line belongs to. rotation: type: integer nullable: true title: Rotation description: The rotation angle in degrees, from 0 to 360, counterclockwise. type: object required: - text - bbox title: OCRLine OCRResult: properties: words: items: $ref: '#/components/schemas/OCRWord' type: array title: Words lines: items: $ref: '#/components/schemas/OCRLine' type: array title: Lines type: object required: - words - lines title: OCRResult OCRWord: properties: text: type: string title: Text bbox: $ref: '#/components/schemas/BoundingBox' confidence: type: number nullable: true title: Confidence description: OCR confidence score between 0 and 1, where 1 indicates highest confidence chunk_index: type: integer nullable: true title: Chunk Index description: The index of the chunk that the word belongs to. rotation: type: integer nullable: true title: Rotation description: The rotation angle in degrees, from 0 to 360, counterclockwise. type: object required: - text - bbox title: OCRWord PageRange: properties: start: type: integer nullable: true title: Start description: The page number to start processing from (1-indexed). end: type: integer nullable: true title: End description: The page number to stop processing at (1-indexed). type: object title: PageRange ParseBlock: properties: type: type: string enum: - Header - Footer - Title - Section Header - Page Number - List Item - Figure - Table - Key Value - Text - Comment - Signature title: Type description: The type of block extracted from the document. bbox: $ref: '#/components/schemas/BoundingBox' description: The bounding box of the block extracted from the document. content: type: string title: Content description: The content of the block extracted from the document. image_url: type: string nullable: true title: Image Url description: (Experimental) The URL of the image associated with the block. chart_data: items: type: string type: array nullable: true title: Chart Data description: (Experimental) The URL/link to chart data JSON for figure blocks processed by chart agent. confidence: type: string nullable: true title: Confidence description: The confidence for the block. It is either low or high and takes into account factors like OCR and table structure default: low granular_confidence: $ref: '#/components/schemas/GranularConfidence' nullable: true description: Granular confidence scores for the block. It is a dictionary of confidence scores for the block. The confidence scores will not be None if the user has enabled numeric confidence scores. extra: additionalProperties: true type: object nullable: true title: Extra description: Extra metadata fields for the block. Fields like 'is_chart' will only appear when set to True. type: object required: - type - bbox - content title: ParseBlock ParseChunk: properties: content: type: string title: Content description: The content of the chunk extracted from the document. embed: type: string title: Embed description: Chunk content optimized for embedding and retrieval. enriched: type: string nullable: true title: Enriched description: The enriched content of the chunk extracted from the document. enrichment_success: type: boolean title: Enrichment Success description: Whether the enrichment was successful. default: false blocks: items: $ref: '#/components/schemas/ParseBlock' type: array title: Blocks type: object required: - content - embed - enriched - blocks title: ParseChunk ParseOptions: properties: enhance: $ref: '#/components/schemas/Enhance' default: agentic: [] summarize_figures: true intelligent_ordering: false retrieval: $ref: '#/components/schemas/Retrieval' default: chunking: chunk_mode: disabled chunk_overlap: 0 filter_blocks: [] embedding_optimized: false formatting: $ref: '#/components/schemas/Formatting' default: add_page_markers: false table_output_format: dynamic merge_tables: false include: [] spreadsheet: $ref: '#/components/schemas/Spreadsheet' default: split_large_tables: enabled: true size: 50 include: [] clustering: accurate exclude: [] settings: $ref: '#/components/schemas/Settings' default: ocr_system: standard extraction_mode: hybrid force_url_result: false return_ocr_data: false return_images: [] embed_pdf_metadata: false embed_pdf_metadata_dpi: 100 persist_results: false hybrid_vpc: {} type: object title: ParseOptions ParseResponse: properties: response_type: type: string title: Response Type default: parse enum: - parse job_id: type: string title: Job Id duration: type: number title: Duration description: The duration of the parse request in seconds. pdf_url: type: string nullable: true title: Pdf Url description: The storage URL of the converted PDF file. studio_link: type: string nullable: true title: Studio Link description: The link to the studio pipeline for the document. usage: $ref: '#/components/schemas/ParseUsage' result: anyOf: - $ref: '#/components/schemas/FullResult' - $ref: '#/components/schemas/UrlResult' title: Result description: The response from the document processing service. Note that there can be two types of responses, Full Result and URL Result. This is due to limitations on the max return size on HTTPS. If the response is too large, it will be returned as a presigned URL in the URL response. You should handle this in your application. parse_mode: type: string enum: - base - lite nullable: true title: Parse Mode description: Which pipeline produced this response. ``lite`` means Reducto Flash Lite served the request; ``base`` is the standard pipeline. Optional / nullable for forward compatibility — older API instances or persisted responses written before this field existed will leave it ``None``; treat ``None`` as ``base``. type: object required: - job_id - duration - usage - result title: ParseResponse ParseUsage: properties: num_pages: type: integer title: Num Pages credits: type: number nullable: true title: Credits credit_breakdown: additionalProperties: type: number propertyNames: enum: - page - html_page - docx_native_page - chart_agent - spreadsheet_cells - billable_spreadsheet_pages - agentic - complex - enrich_table - figure_summary - table_summary - key_value - agentic_text - promptable_agentic_text - reducto_lite_page type: object nullable: true title: Credit Breakdown page_billing_breakdown: additionalProperties: items: type: string enum: - page - html_page - docx_native_page - agentic - complex - chart_agent - spreadsheet_cells - billable_spreadsheet_pages - enrich_table - figure_summary - table_summary - key_value - agentic_text - promptable_agentic_text - reducto_lite_page type: array type: object nullable: true title: Page Billing Breakdown description: Per-page breakdown of features used. Maps 1-indexed page numbers (as strings) to the list of billing features applied on that page (e.g. 'page', 'complex', 'chart_agent'). non_empty_cell_count: type: integer nullable: true title: Non Empty Cell Count description: Total non-empty cells across all sheets. Only set for spreadsheet inputs. type: object required: - num_pages title: ParseUsage PipelineResponse: properties: response_type: type: string title: Response Type default: pipeline enum: - pipeline job_id: type: string title: Job Id usage: $ref: '#/components/schemas/ParseUsage' result: $ref: '#/components/schemas/PipelineResult' type: object required: - job_id - usage - result title: PipelineResponse PipelineResult: properties: parse: anyOf: - $ref: '#/components/schemas/ParseResponse' - items: $ref: '#/components/schemas/ParseResponse' type: array title: Parse nullable: true extract: anyOf: - items: $ref: '#/components/schemas/ExtractSplitResponse' type: array - $ref: '#/components/schemas/ExtractResponse' - $ref: '#/components/schemas/V3ExtractResponse' title: Extract nullable: true split: $ref: '#/components/schemas/SplitResponse' nullable: true edit: $ref: '#/components/schemas/EditResponse' nullable: true type: object required: - parse - extract - split title: PipelineResult PipelineSettings: properties: document_password: type: string nullable: true title: Document Password description: Password to decrypt password-protected documents. additionalProperties: false type: object title: PipelineSettings description: Settings for pipeline execution that override pipeline defaults. QueuePriority: type: string enum: - auto - standard - batch title: QueuePriority description: "Customer-facing queue priority for parse jobs.\n\n``AUTO`` and ``STANDARD`` are wire-level synonyms —\ \ both mean \"default\nqueue.\" Downstream code only branches on ``BATCH``, so the difference\nis purely lexical.\ \ Both values are first-class enum members so a\nrolling deploy is safe in either direction: a pre-#6134 pod (enum\ \ =\n``{auto, batch}``) and a post-fix pod (enum = ``{auto, standard, batch}``)\ncan both deserialise payloads the\ \ other produces.\n\nMigration plan (three stages, each a separate PR fully deployed before\nthe next):\n\n1. **Widen**\ \ *(this PR)*: enum = ``{AUTO, STANDARD, BATCH}``,\n ``default=AUTO``. Compatible with the still-running pre-#6134\n\ \ prod pods after yesterday's rollback.\n2. **Flip default**: same enum, ``default=STANDARD``. Ship after\n stage\ \ 1 is everywhere.\n3. **Narrow** *(future, optional)*: drop ``AUTO`` once every Redis\n payload and SDK has migrated.\ \ Until then it stays as an\n accepted alias." ResponseConfidence: properties: categories: items: $ref: '#/components/schemas/CategoryConfidence' type: array title: Categories type: object required: - categories title: ResponseConfidence description: Overall confidence breakdown for classification response. Retrieval: properties: chunking: $ref: '#/components/schemas/Chunking' default: chunk_mode: disabled chunk_overlap: 0 filter_blocks: items: type: string enum: - Header - Footer - Title - Section Header - Page Number - List Item - Figure - Table - Key Value - Text - Comment - Signature type: array title: Filter Blocks description: A list of block types to filter out from 'content' and 'embed' fields. By default, no blocks are filtered. default: [] embedding_optimized: type: boolean title: Embedding Optimized description: If True, use embedding optimized mode. Defaults to False. default: false type: object title: Retrieval Settings: properties: ocr_system: type: string enum: - standard - legacy title: Ocr System description: Standard is our best multilingual OCR system. Legacy only supports germanic languages and is available for backwards compatibility. default: standard extraction_mode: type: string enum: - ocr - hybrid title: Extraction Mode description: The mode to use for text extraction from PDFs. OCR mode uses optical character recognition only. Hybrid mode combines OCR with embedded PDF text for best accuracy (default). default: hybrid force_url_result: type: boolean title: Force Url Result description: Force the result to be returned in URL form. default: false force_file_extension: type: string nullable: true title: Force File Extension description: Force the URL to be downloaded as a specific file extension (e.g. `.png`). return_ocr_data: type: boolean title: Return Ocr Data description: If True, return OCR data in the result. Defaults to False. default: false return_images: items: type: string enum: - figure - table - page type: array title: Return Images description: Whether to return images for the specified block types. 'page' returns full page images. By default, no images are returned. default: [] embed_pdf_metadata: type: boolean title: Embed Pdf Metadata description: If True, embed OCR metadata into the returned PDF. Defaults to False. default: false embed_pdf_metadata_dpi: type: integer maximum: 250 minimum: 50 title: Embed Pdf Metadata Dpi description: Render DPI used when rasterizing the source PDF before embedding the OCR text layer (only applies when ``embed_pdf_metadata`` is True). Lower values produce dramatically smaller output PDFs; higher values preserve more detail when zoomed past 200%. Defaults to 100 (good for on-screen viewing); raise toward the source scan DPI for crisper output. Min 50, max 250. default: 100 persist_results: type: boolean title: Persist Results description: If True, persist the results indefinitely. Defaults to False. default: false tenant_throttling: $ref: '#/components/schemas/TenantThrottling' nullable: true description: Per-tenant throttling for multi-tenant applications. Tag each request with your tenant's id to bound how much of your account's concurrency a single tenant can consume. Account-level throttles still apply. timeout: type: number nullable: true title: Timeout description: The timeout for the job in seconds. page_range: anyOf: - $ref: '#/components/schemas/PageRange' - items: $ref: '#/components/schemas/PageRange' type: array - items: type: integer type: array - items: type: string type: array title: Page Range description: The page range to process (1-indexed). By default, the entire document is processed. For spreadsheets, you can also provide a list of sheet names. nullable: true document_password: type: string nullable: true title: Document Password description: Password to decrypt password-protected documents. hybrid_vpc: $ref: '#/components/schemas/HybridVpcSettings' description: Hybrid VPC request-scoped settings. default: {} type: object title: Settings SingleJob: properties: job_id: type: string title: Job Id status: type: string enum: - Pending - Completed - Failed - Idle - InProgress - Completing - Cancelled title: Status type: type: string enum: - Parse - Extract - Split - Edit - Pipeline - Classify title: Type raw_config: type: string title: Raw Config created_at: type: string format: date-time title: Created At source: nullable: true title: Source num_pages: type: integer nullable: true title: Num Pages total_pages: type: integer nullable: true title: Total Pages duration: type: number nullable: true title: Duration bucket: nullable: true title: Bucket type: object required: - job_id - status - type - raw_config - created_at - num_pages - total_pages - duration title: SingleJob Split: properties: name: type: string title: Name pages: items: type: integer type: array title: Pages conf: type: string enum: - high - low title: Conf default: low partitions: items: $ref: '#/components/schemas/SplitPartition' type: array nullable: true title: Partitions type: object required: - name - pages title: Split SplitCategory: properties: name: type: string title: Name description: type: string title: Description partition_key: type: string nullable: true title: Partition Key type: object required: - name - description title: SplitCategory SplitLargeTableSizes: properties: row: type: integer nullable: true title: Row description: The number of rows to include in each chunk when splitting large tables. Does not chunk rows if set to None. column: type: integer nullable: true title: Column description: The number of columns to include in each chunk when splitting large tables. Does not chunk columns if set to None. type: object title: SplitLargeTableSizes SplitLargeTables: properties: enabled: type: boolean title: Enabled description: If True, split large tables into smaller tables. Defaults to True. default: true size: anyOf: - type: integer - $ref: '#/components/schemas/SplitLargeTableSizes' title: Size description: The size of the tables to split into. Defaults to 50. Use 'row' and 'column' to independently specify the number of rows and columns to include when splitting. If you only want to split by rows or columns, set the other value to None. default: 50 type: object title: SplitLargeTables SplitPartition: properties: name: type: string title: Name pages: items: type: integer type: array title: Pages conf: type: string enum: - high - low title: Conf default: low type: object required: - name - pages title: SplitPartition SplitResponse: properties: response_type: type: string title: Response Type default: split enum: - split usage: $ref: '#/components/schemas/ParseUsage' result: anyOf: - $ref: '#/components/schemas/SplitResult' - $ref: '#/components/schemas/DeepSplitResult' title: Result description: The split result. type: object required: - usage - result title: SplitResponse SplitResult: properties: section_mapping: additionalProperties: items: type: integer type: array type: object nullable: true title: Section Mapping splits: items: $ref: '#/components/schemas/Split' type: array title: Splits type: object required: - section_mapping - splits title: SplitResult SplitSettings: properties: table_cutoff: type: string enum: - truncate - preserve title: Table Cutoff description: If tables should be truncated to the first few rows or if all content should be preserved. truncate improves latency, preserve is recommended for cases where partition_key is being used and the partition_key may be included within the table. Defaults to truncate default: truncate allow_page_overlap: type: boolean title: Allow Page Overlap description: If True, a page can belong to multiple categories/partitions. If False, each page must belong to exactly one category. Defaults to True. default: true deep_split: type: boolean title: Deep Split description: If True, uses the deep split agent for higher-quality document splitting. Off by default. default: false type: object title: SplitSettings Spreadsheet: properties: split_large_tables: $ref: '#/components/schemas/SplitLargeTables' default: enabled: true size: 50 include: items: type: string enum: - cell_colors - formula - dropdowns type: array title: Include description: Whether to include cell color, formula, and dropdown information in the output. default: [] clustering: type: string enum: - accurate - fast - disabled title: Clustering description: In a spreadsheet with different tables inside, we enable splitting up the tables by default. Accurate mode applies more powerful models for superior accuracy, at 5× the default per-cell rate. Disabling will register as one large table. default: accurate exclude: items: type: string enum: - hidden_sheets - hidden_rows - hidden_cols - styling - spreadsheet_images type: array title: Exclude description: Whether to exclude hidden sheets, rows, or columns in the output. default: [] max_cell_count: type: integer minimum: 1 nullable: true title: Max Cell Count description: Maximum total non-empty cells allowed across all sheets. If exceeded, the request is rejected with a 422 error. Set to null to disable the limit. Defaults to null. type: object title: Spreadsheet SvixWebhookConfig: properties: mode: type: string title: Mode default: svix enum: - svix channels: items: type: string type: array title: Channels description: A list of Svix channels the message will be delivered down, omit to send to all channels. type: object title: SvixWebhookConfig SyncExtractConfig: properties: input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " parsing: $ref: '#/components/schemas/ParseOptions' description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored. default: enhance: agentic: [] intelligent_ordering: false summarize_figures: true retrieval: chunking: chunk_mode: disabled chunk_overlap: 0 embedding_optimized: false filter_blocks: [] formatting: add_page_markers: false include: [] merge_tables: false table_output_format: dynamic spreadsheet: clustering: accurate exclude: [] include: [] split_large_tables: enabled: true size: 50 settings: embed_pdf_metadata: false embed_pdf_metadata_dpi: 100 extraction_mode: hybrid force_url_result: false hybrid_vpc: {} ocr_system: standard persist_results: false return_images: [] return_ocr_data: false instructions: $ref: '#/components/schemas/Instructions' description: The instructions to use for the extraction. default: schema: {} system_prompt: Be precise and thorough. settings: $ref: '#/components/schemas/ExtractSettings' description: The settings to use for the extraction. default: include_images: false optimize_for_latency: false array_extract: false deep_extract: false citations: enabled: false numerical_confidence: true type: object required: - input title: SyncExtractConfig SyncParseConfig: properties: input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " enhance: $ref: '#/components/schemas/Enhance' default: agentic: [] summarize_figures: true intelligent_ordering: false retrieval: $ref: '#/components/schemas/Retrieval' default: chunking: chunk_mode: disabled chunk_overlap: 0 filter_blocks: [] embedding_optimized: false formatting: $ref: '#/components/schemas/Formatting' default: add_page_markers: false table_output_format: dynamic merge_tables: false include: [] spreadsheet: $ref: '#/components/schemas/Spreadsheet' default: split_large_tables: enabled: true size: 50 include: [] clustering: accurate exclude: [] settings: $ref: '#/components/schemas/Settings' default: ocr_system: standard extraction_mode: hybrid force_url_result: false return_ocr_data: false return_images: [] embed_pdf_metadata: false embed_pdf_metadata_dpi: 100 persist_results: false hybrid_vpc: {} type: object required: - input title: SyncParseConfig SyncSplitConfig: properties: input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " parsing: $ref: '#/components/schemas/ParseOptions' description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored. default: enhance: agentic: [] intelligent_ordering: false summarize_figures: true retrieval: chunking: chunk_mode: disabled chunk_overlap: 0 embedding_optimized: false filter_blocks: [] formatting: add_page_markers: false include: [] merge_tables: false table_output_format: dynamic spreadsheet: clustering: accurate exclude: [] include: [] split_large_tables: enabled: true size: 50 settings: embed_pdf_metadata: false embed_pdf_metadata_dpi: 100 extraction_mode: hybrid force_url_result: false hybrid_vpc: {} ocr_system: standard persist_results: false return_images: [] return_ocr_data: false split_description: items: $ref: '#/components/schemas/SplitCategory' type: array title: Split Description description: The configuration options for processing the document. split_rules: type: string title: Split Rules description: The prompt that describes rules for splitting the document. default: Split the document into the applicable sections. Sections may only overlap at their first and last page if at all. settings: $ref: '#/components/schemas/SplitSettings' description: The settings for split processing. default: table_cutoff: truncate allow_page_overlap: true deep_split: false type: object required: - input - split_description title: SyncSplitConfig TableAgentic: properties: scope: type: string title: Scope enum: - table prompt: type: string nullable: true title: Prompt description: Custom prompt for table agentic. mode: type: string enum: - default - auto - max title: Mode description: 'Mode for table agentic: ''default'' selectively applies enrichment only to tables likely to benefit, and ''max'' runs enrichment on all tables.' default: default type: object required: - scope title: TableAgentic TenantThrottling: properties: tenant_id: type: string maxLength: 256 minLength: 1 title: Tenant Id description: Your identifier for the tenant (customer, workspace, organization) this request belongs to. Used only for noisy-neighbor throttling inside your account. max_share: type: number maximum: 1 title: Max Share description: Maximum fraction of your account's concurrency ceiling this tenant may use, between 0 (exclusive) and 1. Defaults to 0.5. default: 0.5 minimum: 0 exclusiveMinimum: true type: object required: - tenant_id title: TenantThrottling TextAgentic: properties: scope: type: string title: Scope enum: - text prompt: type: string nullable: true title: Prompt description: 'Custom instructions for agentic text. Note: This only applies to form regions (key-value).' type: object required: - scope title: TextAgentic UploadResponse: properties: file_id: type: string title: File Id presigned_url: type: string nullable: true title: Presigned Url type: object required: - file_id title: UploadResponse UrlResult: properties: type: type: string title: Type description: type = 'url' enum: - url url: type: string title: Url result_id: type: string title: Result Id type: object required: - type - url - result_id title: UrlResult V3AsyncPipelineConfig: properties: async: $ref: '#/components/schemas/config__v3__AsyncConfig' description: The configuration options for asynchronous processing (default synchronous). default: priority: false input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " pipeline_id: type: string title: Pipeline Id description: The ID of the pipeline to use for the document. settings: $ref: '#/components/schemas/PipelineSettings' default: {} type: object required: - input - pipeline_id title: V3AsyncPipelineConfig V3ExtractResponse: additionalProperties: true type: object V3PipelineConfig: properties: input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " pipeline_id: type: string title: Pipeline Id description: The ID of the pipeline to use for the document. settings: $ref: '#/components/schemas/PipelineSettings' default: {} type: object required: - input - pipeline_id title: V3PipelineConfig ValidationError: properties: loc: items: anyOf: - type: string - type: integer type: array title: Location msg: type: string title: Message type: type: string title: Error Type input: title: Input ctx: type: object title: Context type: object required: - loc - msg - type title: ValidationError WebhookConfigNew: properties: mode: type: string enum: - disabled - svix - direct title: Mode description: The mode to use for webhook delivery. Defaults to 'disabled'. We recommend using 'svix' for production environments. default: disabled url: type: string title: Url description: The URL to send the webhook to (if using direct webhoook). metadata: title: Metadata description: JSON metadata included in webhook request body channels: items: type: string type: array title: Channels description: A list of Svix channels the message will be delivered down, omit to send to all channels. type: object title: WebhookConfigNew config__v3__AsyncConfig: properties: metadata: title: Metadata description: JSON metadata included in webhook request body. Defaults to None. priority: type: boolean title: Priority description: If True, attempts to process the job with priority if the user has priority processing budget available; by default, sync jobs are prioritized above async jobs. default: false webhook: anyOf: - $ref: '#/components/schemas/SvixWebhookConfig' - $ref: '#/components/schemas/DirectWebhookConfig' title: Webhook description: The webhook configuration for the asynchronous processing. nullable: true type: object title: AsyncConfig config__v3__AsyncSplitConfig: properties: async: $ref: '#/components/schemas/config__v3__AsyncConfig' description: The configuration options for asynchronous processing (default synchronous). default: priority: false input: anyOf: - type: string - items: type: string type: array - $ref: '#/components/schemas/UploadResponse' title: Input description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\ \ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\ \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \ \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\ \ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions " parsing: $ref: '#/components/schemas/ParseOptions' description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file, then this configuration will be ignored. default: enhance: agentic: [] intelligent_ordering: false summarize_figures: true retrieval: chunking: chunk_mode: disabled chunk_overlap: 0 embedding_optimized: false filter_blocks: [] formatting: add_page_markers: false include: [] merge_tables: false table_output_format: dynamic spreadsheet: clustering: accurate exclude: [] include: [] split_large_tables: enabled: true size: 50 settings: embed_pdf_metadata: false embed_pdf_metadata_dpi: 100 extraction_mode: hybrid force_url_result: false hybrid_vpc: {} ocr_system: standard persist_results: false return_images: [] return_ocr_data: false split_description: items: $ref: '#/components/schemas/SplitCategory' type: array title: Split Description description: The configuration options for processing the document. split_rules: type: string title: Split Rules description: The prompt that describes rules for splitting the document. default: Split the document into the applicable sections. Sections may only overlap at their first and last page if at all. settings: $ref: '#/components/schemas/SplitSettings' description: The settings for split processing. default: table_cutoff: truncate allow_page_overlap: true deep_split: false type: object required: - input - split_description title: AsyncSplitConfig