openapi: 3.0.3
info:
  title: Reducto API
  description: REST API for parsing, extracting, splitting, classifying, and editing complex documents. Supports PDFs, Word
    files, spreadsheets, presentations, and scanned images using layout-aware OCR and vision language models.
  version: v1.11.80-78-gc5c4ff11c
  contact:
    url: https://reducto.ai/
    email: support@reducto.ai
  license:
    name: Proprietary
servers:
- url: https://platform.reducto.ai
security:
- BearerAuth: []
paths:
  /parse:
    post:
      summary: Parse
      operationId: parse_parse_post
      requestBody:
        content:
          application/json:
            schema:
              oneOf:
              - $ref: '#/components/schemas/SyncParseConfig'
              - $ref: '#/components/schemas/AsyncParseConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                anyOf:
                - $ref: '#/components/schemas/ParseResponse'
                - $ref: '#/components/schemas/AsyncParseResponse'
                title: Response Parse Parse Post
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /parse_async:
    post:
      summary: Async Parse
      operationId: async_parse_parse_async_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AsyncParseConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AsyncParseResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /extract:
    post:
      summary: Extract
      operationId: extract_extract_post
      requestBody:
        content:
          application/json:
            schema:
              oneOf:
              - $ref: '#/components/schemas/SyncExtractConfig'
              - $ref: '#/components/schemas/AsyncExtractConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                oneOf:
                - $ref: '#/components/schemas/V3ExtractResponse'
                - $ref: '#/components/schemas/AsyncExtractResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /extract_async:
    post:
      summary: Extract Async
      operationId: extract_async_extract_async_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AsyncExtractConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AsyncExtractResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /split:
    post:
      summary: Split
      operationId: split_split_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SyncSplitConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SplitResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /split_async:
    post:
      summary: Split Async
      operationId: split_async_split_async_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/config__v3__AsyncSplitConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AsyncSplitResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /edit:
    post:
      summary: Edit
      operationId: edit_edit_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EditConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EditResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /edit_async:
    post:
      summary: Edit Async
      operationId: edit_async_edit_async_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AsyncEditConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AsyncEditResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /pipeline:
    post:
      summary: Pipeline
      operationId: pipeline_pipeline_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/V3PipelineConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PipelineResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /pipeline_async:
    post:
      summary: Pipeline Async
      operationId: pipeline_async_pipeline_async_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/V3AsyncPipelineConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AsyncPipelineResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /classify:
    post:
      summary: Classify
      operationId: classify_classify_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ClassifyConfig'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ClassifyResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /job/{job_id}:
    get:
      summary: Retrieve Parse
      operationId: retrieve_parse_job__job_id__get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                anyOf:
                - $ref: '#/components/schemas/AsyncJobResponse'
                - $ref: '#/components/schemas/EnhancedAsyncJobResponse'
                title: Response Retrieve Parse Job  Job Id  Get
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /cancel/{job_id}:
    post:
      summary: Cancel Job
      operationId: cancel_job_cancel__job_id__post
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema: {}
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /jobs:
    get:
      summary: Get Jobs
      operationId: get_jobs_jobs_get
      parameters:
      - name: exclude_configs
        in: query
        required: false
        schema:
          type: boolean
          description: Exclude raw_config from response to reduce size
          default: false
          title: Exclude Configs
        description: Exclude raw_config from response to reduce size
      - name: cursor
        in: query
        required: false
        schema:
          type: string
          nullable: true
          description: Cursor for pagination. Use the next_cursor from the previous response to fetch the next page.
          title: Cursor
        description: Cursor for pagination. Use the next_cursor from the previous response to fetch the next page.
      - name: limit
        in: query
        required: false
        schema:
          type: integer
          maximum: 500
          minimum: 1
          description: Maximum number of jobs to return per page. Defaults to 100, max 500.
          default: 100
          title: Limit
        description: Maximum number of jobs to return per page. Defaults to 100, max 500.
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JobsResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /upload:
    post:
      summary: Upload
      operationId: upload_upload_post
      parameters:
      - name: extension
        in: query
        required: false
        schema:
          type: string
          nullable: true
          title: Extension
      requestBody:
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/Body_upload_upload_post'
          application/json:
            schema:
              $ref: '#/components/schemas/Body_upload_upload_post'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/UploadResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /configure_webhook:
    post:
      summary: Webhook Portal
      operationId: webhook_portal_configure_webhook_post
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                type: string
                title: Response Webhook Portal Configure Webhook Post
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /version:
    get:
      summary: Get Version
      operationId: get_version_version_get
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                type: string
                title: Response Get Version Version Get
components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      description: API key passed as a Bearer token in the Authorization header.
  schemas:
    AsyncEditConfig:
      properties:
        document_url:
          anyOf:
          - type: string
          - $ref: '#/components/schemas/UploadResponse'
          title: Document Url
          description: 'The URL of the document to be processed. You can provide one of the following:

            1. A publicly available URL

            2. A presigned S3 URL

            3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document

            '
        edit_instructions:
          type: string
          title: Edit Instructions
          description: The instructions for the edit.
        edit_options:
          $ref: '#/components/schemas/EditOptions'
          default:
            color: '#FF0000'
            enable_overflow_pages: false
            flatten: false
        form_schema:
          items:
            $ref: '#/components/schemas/EditWidget'
          type: array
          nullable: true
          title: Form Schema
          description: Form schema for PDF forms. List of widgets with their types, descriptions, and bounding boxes. Only
            works for PDFs.
        priority:
          type: boolean
          title: Priority
          description: If True, attempts to process the job with priority if the user has priority processing budget available;
            by default, sync jobs are prioritized above async jobs.
          default: false
        webhook:
          $ref: '#/components/schemas/WebhookConfigNew'
          default:
            mode: disabled
            channels: []
      type: object
      required:
      - document_url
      - edit_instructions
      title: AsyncEditConfig
    AsyncEditResponse:
      properties:
        job_id:
          type: string
          title: Job Id
      type: object
      required:
      - job_id
      title: AsyncEditResponse
    AsyncExtractConfig:
      properties:
        async:
          $ref: '#/components/schemas/config__v3__AsyncConfig'
          description: The configuration options for asynchronous processing (default synchronous).
          default:
            priority: false
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        parsing:
          $ref: '#/components/schemas/ParseOptions'
          description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file,
            then this configuration will be ignored.
          default:
            enhance:
              agentic: []
              intelligent_ordering: false
              summarize_figures: true
            retrieval:
              chunking:
                chunk_mode: disabled
                chunk_overlap: 0
              embedding_optimized: false
              filter_blocks: []
            formatting:
              add_page_markers: false
              include: []
              merge_tables: false
              table_output_format: dynamic
            spreadsheet:
              clustering: accurate
              exclude: []
              include: []
              split_large_tables:
                enabled: true
                size: 50
            settings:
              embed_pdf_metadata: false
              embed_pdf_metadata_dpi: 100
              extraction_mode: hybrid
              force_url_result: false
              hybrid_vpc: {}
              ocr_system: standard
              persist_results: false
              return_images: []
              return_ocr_data: false
        instructions:
          $ref: '#/components/schemas/Instructions'
          description: The instructions to use for the extraction.
          default:
            schema: {}
            system_prompt: Be precise and thorough.
        settings:
          $ref: '#/components/schemas/ExtractSettings'
          description: The settings to use for the extraction.
          default:
            include_images: false
            optimize_for_latency: false
            array_extract: false
            deep_extract: false
            citations:
              enabled: false
              numerical_confidence: true
      type: object
      required:
      - input
      title: AsyncExtractConfig
    AsyncExtractResponse:
      properties:
        job_id:
          type: string
          title: Job Id
      type: object
      required:
      - job_id
      title: AsyncExtractResponse
    AsyncJobResponse:
      properties:
        status:
          type: string
          enum:
          - Pending
          - Completed
          - Failed
          - Idle
          title: Status
        result:
          oneOf:
          - $ref: '#/components/schemas/ParseResponse'
          - $ref: '#/components/schemas/ExtractResponse'
          - $ref: '#/components/schemas/SplitResponse'
          - $ref: '#/components/schemas/EditResponse'
          - $ref: '#/components/schemas/PipelineResponse'
          - $ref: '#/components/schemas/V3ExtractResponse'
          - $ref: '#/components/schemas/ClassifyResponse'
          discriminator:
            propertyName: response_type
            mapping:
              classify: '#/components/schemas/ClassifyResponse'
              edit: '#/components/schemas/EditResponse'
              extract: '#/components/schemas/ExtractResponse'
              parse: '#/components/schemas/ParseResponse'
              pipeline: '#/components/schemas/PipelineResponse'
              split: '#/components/schemas/SplitResponse'
              v3_extract: '#/components/schemas/V3ExtractResponse'
          nullable: true
          title: Result
        progress:
          type: number
          nullable: true
          title: Progress
        reason:
          type: string
          nullable: true
          title: Reason
      type: object
      required:
      - status
      title: AsyncJobResponse
    AsyncParseConfig:
      properties:
        async:
          $ref: '#/components/schemas/config__v3__AsyncConfig'
          description: The configuration options for asynchronous processing (default synchronous).
          default:
            priority: false
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        enhance:
          $ref: '#/components/schemas/Enhance'
          default:
            agentic: []
            summarize_figures: true
            intelligent_ordering: false
        retrieval:
          $ref: '#/components/schemas/Retrieval'
          default:
            chunking:
              chunk_mode: disabled
              chunk_overlap: 0
            filter_blocks: []
            embedding_optimized: false
        formatting:
          $ref: '#/components/schemas/Formatting'
          default:
            add_page_markers: false
            table_output_format: dynamic
            merge_tables: false
            include: []
        spreadsheet:
          $ref: '#/components/schemas/Spreadsheet'
          default:
            split_large_tables:
              enabled: true
              size: 50
            include: []
            clustering: accurate
            exclude: []
        settings:
          $ref: '#/components/schemas/Settings'
          default:
            ocr_system: standard
            extraction_mode: hybrid
            force_url_result: false
            return_ocr_data: false
            return_images: []
            embed_pdf_metadata: false
            embed_pdf_metadata_dpi: 100
            persist_results: false
            hybrid_vpc: {}
        queue_priority:
          $ref: '#/components/schemas/QueuePriority'
          description: 'Queue priority. ''batch'' for non-urgent work that processes when spare GPU capacity is available.
            ''auto'' (alias: ''standard'') uses the default queue.'
          default: auto
      type: object
      required:
      - input
      title: AsyncParseConfig
    AsyncParseResponse:
      properties:
        job_id:
          type: string
          title: Job Id
      type: object
      required:
      - job_id
      title: AsyncParseResponse
    AsyncPipelineResponse:
      properties:
        job_id:
          type: string
          title: Job Id
      type: object
      required:
      - job_id
      title: AsyncPipelineResponse
    AsyncSplitResponse:
      properties:
        job_id:
          type: string
          title: Job Id
      type: object
      required:
      - job_id
      title: AsyncSplitResponse
    Body_upload_upload_post:
      properties:
        file:
          anyOf:
          - type: string
            format: binary
          - type: string
          title: File
          nullable: true
      type: object
      title: Body_upload_upload_post
    BoundingBox:
      properties:
        left:
          type: number
          title: Left
        top:
          type: number
          title: Top
        width:
          type: number
          title: Width
        height:
          type: number
          title: Height
        page:
          type: integer
          title: Page
          description: The page number of the bounding box (1-indexed).
        original_page:
          type: integer
          title: Original Page
          description: The page number in the original document of the bounding box (1-indexed).
      type: object
      required:
      - left
      - top
      - width
      - height
      - page
      title: BoundingBox
    CategoryConfidence:
      properties:
        category:
          type: string
          title: Category
        confidence:
          type: number
          title: Confidence
        criteria_confidence:
          items:
            $ref: '#/components/schemas/CriteriaConfidence'
          type: array
          title: Criteria Confidence
      type: object
      required:
      - category
      - confidence
      - criteria_confidence
      title: CategoryConfidence
      description: Confidence result for a category.
    Chunking:
      properties:
        chunk_mode:
          type: string
          enum:
          - variable
          - section
          - page
          - disabled
          - block
          - page_sections
          title: Chunk Mode
          description: Choose how to partition chunks. Variable mode chunks by character length and visual context. Section
            mode chunks by section headers. Page mode chunks according to pages. Page sections mode chunks first by page,
            then by sections within each page. Disabled returns one single chunk.
          default: disabled
        chunk_size:
          type: integer
          nullable: true
          title: Chunk Size
          description: The approximate size of chunks (in characters) that the document will be split into. Defaults to null,
            in which case the chunk size is variable between 250 - 1500 characters.
        chunk_overlap:
          type: integer
          title: Chunk Overlap
          description: Number of characters of overlap to include from adjacent chunks. Defaults to 0.
          default: 0
      type: object
      title: Chunking
    Citations:
      properties:
        enabled:
          type: boolean
          title: Enabled
          description: If True, include citations in the extraction.
          default: false
        numerical_confidence:
          type: boolean
          title: Numerical Confidence
          description: If True, enable numeric citation confidence scores. Defaults to True.
          default: true
      type: object
      title: Citations
    ClassificationCategory:
      properties:
        category:
          type: string
          title: Category
          description: The category name/label that documents will be classified into (e.g., 'invoice', 'contract', 'receipt').
        criteria:
          items:
            type: string
          type: array
          title: Criteria
          description: A list of criteria, keywords, or descriptions that define what characteristics a document must have
            to be classified into this category (e.g., ['contains billing information', 'has itemized charges']).
      type: object
      required:
      - category
      - criteria
      title: ClassificationCategory
      description: A single classification category with its matching criteria.
    ClassifyConfig:
      properties:
        persist_results:
          type: boolean
          title: Persist Results
          description: If True, persist the results indefinitely. Defaults to False.
          default: false
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        classification_schema:
          items:
            $ref: '#/components/schemas/ClassificationCategory'
          type: array
          title: Classification Schema
          description: A list of classification categories and their matching criteria.
          default: []
        page_range:
          anyOf:
          - $ref: '#/components/schemas/PageRange'
          - items:
              $ref: '#/components/schemas/PageRange'
            type: array
          - items:
              type: integer
            type: array
          title: Page Range
          description: The page range to process (1-indexed). By default, the first 5 pages are used. If more than 25 pages
            are selected, only the first 25 (after sorting) are used. Only applies to PDFs; ignored for other document types.
          nullable: true
        document_metadata:
          type: string
          nullable: true
          title: Document Metadata
          description: Optional document-level metadata to include in classification prompts.
      type: object
      required:
      - input
      title: ClassifyConfig
    ClassifyResponse:
      properties:
        response_type:
          type: string
          title: Response Type
          default: classify
          enum:
          - classify
        job_id:
          type: string
          title: Job Id
        result:
          $ref: '#/components/schemas/ClassifyResponseCategory'
        response_confidence:
          $ref: '#/components/schemas/ResponseConfidence'
          nullable: true
        usage:
          $ref: '#/components/schemas/ClassifyUsage'
          nullable: true
        duration:
          type: number
          nullable: true
          title: Duration
          description: The duration of the classify request in seconds.
      type: object
      required:
      - job_id
      - result
      title: ClassifyResponse
      description: Response from classify job - returned when polling /job/{job_id}
    ClassifyResponseCategory:
      properties:
        category:
          type: string
          title: Category
      type: object
      required:
      - category
      title: ClassifyResponseCategory
    ClassifyUsage:
      properties:
        num_pages:
          type: integer
          title: Num Pages
        num_categories:
          type: integer
          title: Num Categories
        credits:
          type: number
          nullable: true
          title: Credits
      type: object
      required:
      - num_pages
      - num_categories
      title: ClassifyUsage
    CriteriaConfidence:
      properties:
        criterion:
          type: string
          title: Criterion
        confidence:
          type: string
          enum:
          - high
          - low
          title: Confidence
      type: object
      required:
      - criterion
      - confidence
      title: CriteriaConfidence
      description: Confidence result for a single criterion.
    DeepSplit:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            $ref: '#/components/schemas/DeepSplitPageEvidence'
          type: array
          title: Pages
        partitions:
          items:
            $ref: '#/components/schemas/DeepSplitPartition'
          type: array
          nullable: true
          title: Partitions
      type: object
      required:
      - name
      - pages
      title: DeepSplit
    DeepSplitPageEvidence:
      properties:
        page_number:
          type: integer
          title: Page Number
        evidence:
          type: string
          title: Evidence
        confidence:
          type: string
          enum:
          - high
          - medium
          - low
          nullable: true
          title: Confidence
      type: object
      required:
      - page_number
      - evidence
      title: DeepSplitPageEvidence
    DeepSplitPartition:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            $ref: '#/components/schemas/DeepSplitPageEvidence'
          type: array
          title: Pages
      type: object
      required:
      - name
      - pages
      title: DeepSplitPartition
    DeepSplitResult:
      properties:
        splits:
          items:
            $ref: '#/components/schemas/DeepSplit'
          type: array
          title: Splits
      type: object
      required:
      - splits
      title: DeepSplitResult
    DirectWebhookConfig:
      properties:
        mode:
          type: string
          title: Mode
          default: direct
          enum:
          - direct
        url:
          type: string
          title: Url
      type: object
      required:
      - url
      title: DirectWebhookConfig
    EditConfig:
      properties:
        document_url:
          anyOf:
          - type: string
          - $ref: '#/components/schemas/UploadResponse'
          title: Document Url
          description: 'The URL of the document to be processed. You can provide one of the following:

            1. A publicly available URL

            2. A presigned S3 URL

            3. A reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document

            '
        edit_instructions:
          type: string
          title: Edit Instructions
          description: The instructions for the edit.
        edit_options:
          $ref: '#/components/schemas/EditOptions'
          default:
            color: '#FF0000'
            enable_overflow_pages: false
            flatten: false
        form_schema:
          items:
            $ref: '#/components/schemas/EditWidget'
          type: array
          nullable: true
          title: Form Schema
          description: Form schema for PDF forms. List of widgets with their types, descriptions, and bounding boxes. Only
            works for PDFs.
        priority:
          type: boolean
          title: Priority
          description: If True, attempts to process the job with priority if the user has priority processing budget available;
            by default, sync jobs are prioritized above async jobs.
          default: true
      type: object
      required:
      - document_url
      - edit_instructions
      title: EditConfig
    EditOptions:
      properties:
        color:
          type: string
          pattern: ^#[0-9A-Fa-f]{6}$
          title: Color
          description: The color to use for edits, in hex format.
          default: '#FF0000'
        font_size:
          type: number
          maximum: 72
          minimum: 1
          nullable: true
          title: Font Size
          description: The font size (in points) to use for filled text fields. If not specified, font size is automatically
            calculated based on field dimensions.
        llm_provider_preference:
          type: string
          enum:
          - openai
          - anthropic
          - google
          nullable: true
          title: Llm Provider Preference
          description: The LLM provider to use for edit processing. If not specified, defaults to 'google'
        enable_overflow_pages:
          type: boolean
          title: Enable Overflow Pages
          description: If True, creates overflow pages for text that doesn't fit in form fields. Defaults to False.
          default: false
        flatten:
          type: boolean
          title: Flatten
          description: If True, flattens form fields after filling, converting them to static content. Defaults to False.
          default: false
      type: object
      title: EditOptions
    EditResponse:
      properties:
        response_type:
          type: string
          title: Response Type
          default: edit
          enum:
          - edit
        document_url:
          type: string
          title: Document Url
          description: Presigned URL to download the edited document.
        form_schema:
          items:
            $ref: '#/components/schemas/EditWidget'
          type: array
          nullable: true
          title: Form Schema
          description: Form schema for PDF forms. List of widgets with their types, descriptions, and bounding boxes.
        usage:
          $ref: '#/components/schemas/ParseUsage'
          nullable: true
          description: Usage information for the edit operation, including number of pages and credits charged.
      type: object
      required:
      - document_url
      title: EditResponse
    EditWidget:
      properties:
        bbox:
          $ref: '#/components/schemas/BoundingBox'
          description: Bounding box coordinates of the widget
        description:
          type: string
          title: Description
          description: Description of the widget extracted from the document
        type:
          type: string
          enum:
          - text
          - checkbox
          - radio
          - dropdown
          - barcode
          title: Type
          description: Type of the form widget
        fill:
          type: boolean
          title: Fill
          description: If True (default), the system will attempt to fill this widget. If False, the widget will be created
            but intentionally left unfilled.
          default: true
        value:
          type: string
          nullable: true
          title: Value
          description: If provided, this value will be used directly instead of attempting to intelligently determine the
            field value.
        font_size:
          type: number
          maximum: 72
          minimum: 1
          nullable: true
          title: Font Size
          description: Font size in points for this specific field. Takes priority over the global font_size in EditOptions.
            If not set, falls back to the global font_size, then to auto-calculated sizing.
      type: object
      required:
      - bbox
      - description
      - type
      title: EditWidget
    Enhance:
      properties:
        agentic:
          items:
            anyOf:
            - $ref: '#/components/schemas/TableAgentic'
            - $ref: '#/components/schemas/FigureAgentic'
            - $ref: '#/components/schemas/TextAgentic'
          type: array
          title: Agentic
          description: Agentic uses vision language models to enhance the accuracy of the output of different types of extraction.
            This will incur a cost and latency increase.
          default: []
        summarize_figures:
          type: boolean
          title: Summarize Figures
          description: If True, summarize figures using a small vision language model. Defaults to True.
          default: true
        intelligent_ordering:
          type: boolean
          title: Intelligent Ordering
          description: If True, use an advanced vision language model to improve reading order accuracy, with a small increase
            in latency. Defaults to False.
          default: false
      type: object
      title: Enhance
    EnhancedAsyncJobResponse:
      properties:
        status:
          type: string
          enum:
          - Pending
          - Completed
          - Failed
          - Idle
          title: Status
        result:
          oneOf:
          - $ref: '#/components/schemas/ParseResponse'
          - $ref: '#/components/schemas/ExtractResponse'
          - $ref: '#/components/schemas/SplitResponse'
          - $ref: '#/components/schemas/EditResponse'
          - $ref: '#/components/schemas/PipelineResponse'
          - $ref: '#/components/schemas/V3ExtractResponse'
          - $ref: '#/components/schemas/ClassifyResponse'
          discriminator:
            propertyName: response_type
            mapping:
              classify: '#/components/schemas/ClassifyResponse'
              edit: '#/components/schemas/EditResponse'
              extract: '#/components/schemas/ExtractResponse'
              parse: '#/components/schemas/ParseResponse'
              pipeline: '#/components/schemas/PipelineResponse'
              split: '#/components/schemas/SplitResponse'
              v3_extract: '#/components/schemas/V3ExtractResponse'
          nullable: true
          title: Result
        progress:
          type: number
          nullable: true
          title: Progress
        reason:
          type: string
          nullable: true
          title: Reason
        type:
          type: string
          enum:
          - Parse
          - Extract
          - Split
          - Edit
          - Pipeline
          - Classify
          nullable: true
          title: Type
        num_pages:
          type: integer
          nullable: true
          title: Num Pages
        total_pages:
          type: integer
          nullable: true
          title: Total Pages
        source:
          nullable: true
          title: Source
        duration:
          type: number
          nullable: true
          title: Duration
        created_at:
          type: string
          format: date-time
          nullable: true
          title: Created At
        raw_config:
          type: string
          nullable: true
          title: Raw Config
        bucket:
          nullable: true
          title: Bucket
      type: object
      required:
      - status
      title: EnhancedAsyncJobResponse
    ExtractResponse:
      additionalProperties: true
      type: object
    ExtractSettings:
      properties:
        include_images:
          type: boolean
          title: Include Images
          description: If True, include images in the extraction.
          default: false
        optimize_for_latency:
          type: boolean
          title: Optimize For Latency
          description: If True, jobs will be processed with a higher throughput and priority at a higher cost. Defaults to
            False.
          default: false
        array_extract:
          type: boolean
          title: Array Extract
          description: If True, use array extraction.
          default: false
        deep_extract:
          type: boolean
          title: Deep Extract
          description: If True, use Deep Extract, an agentic extraction mode that iteratively refines its output to achieve
            near-perfect accuracy. Best for complex documents where accuracy is critical.
          default: false
        citations:
          $ref: '#/components/schemas/Citations'
          description: The citations to use for the extraction.
          default:
            enabled: false
            numerical_confidence: true
      type: object
      title: ExtractSettings
    ExtractSplitResponse:
      properties:
        split_name:
          type: string
          title: Split Name
        page_range:
          items:
            type: integer
          type: array
          title: Page Range
        partition:
          type: string
          nullable: true
          title: Partition
        result:
          anyOf:
          - $ref: '#/components/schemas/ExtractResponse'
          - $ref: '#/components/schemas/V3ExtractResponse'
          title: Result
      type: object
      required:
      - split_name
      - page_range
      - result
      title: ExtractSplitResponse
      description: This is the response format for Extract -> Split Pipelines
    FigureAgentic:
      properties:
        scope:
          type: string
          title: Scope
          enum:
          - figure
        prompt:
          type: string
          nullable: true
          title: Prompt
          description: Custom prompt for figure agentic.
        advanced_chart_agent:
          type: boolean
          title: Advanced Chart Agent
          description: If True, use the advanced chart agent. Defaults to False.
          default: false
        return_overlays:
          type: boolean
          title: Return Overlays
          description: If True, return overlays for the figure. This is so you can use the overlays to double check the quality
            of the extraction
          default: false
      type: object
      required:
      - scope
      title: FigureAgentic
    Formatting:
      properties:
        add_page_markers:
          type: boolean
          title: Add Page Markers
          description: If True, add page markers to the output. Defaults to False. Useful for extracting data with page specific
            information.
          default: false
        table_output_format:
          type: string
          enum:
          - html
          - json
          - md
          - jsonbbox
          - dynamic
          - csv
          title: Table Output Format
          description: The mode to use for table output. Defaults to dynamic, which returns md for simpler tables and html
            for more complex tables.
          default: dynamic
        merge_tables:
          type: boolean
          title: Merge Tables
          description: A flag to indicate if consecutive tables with the same number of columns should be merged. Defaults
            to False.
          default: false
        include:
          items:
            type: string
            enum:
            - change_tracking
            - highlight
            - comments
            - hyperlinks
            - signatures
            - ignore_watermarks
          type: array
          title: Include
          description: A list of formatting to include in the output.
          default: []
      type: object
      title: Formatting
    FullResult:
      properties:
        type:
          type: string
          title: Type
          description: type = 'full'
          enum:
          - full
        chunks:
          items:
            $ref: '#/components/schemas/ParseChunk'
          type: array
          title: Chunks
        ocr:
          $ref: '#/components/schemas/OCRResult'
          nullable: true
        custom:
          nullable: true
          title: Custom
      type: object
      required:
      - type
      - chunks
      title: FullResult
    GranularConfidence:
      properties:
        extract_confidence:
          type: number
          nullable: true
          title: Extract Confidence
        parse_confidence:
          type: number
          nullable: true
          title: Parse Confidence
      type: object
      title: GranularConfidence
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    HybridVpcSettings:
      properties:
        environment:
          type: string
          nullable: true
          title: Environment
          description: Named Hybrid VPC environment to use for this request. Only applies when your organization has Hybrid
            VPC environments configured.
      type: object
      title: HybridVpcSettings
    Instructions:
      properties:
        schema:
          title: Schema
          description: The JSON schema to use for the extraction.
          default: {}
        system_prompt:
          type: string
          title: System Prompt
          description: The system prompt to use for the extraction.
          default: Be precise and thorough.
      type: object
      title: Instructions
    JobsResponse:
      properties:
        jobs:
          items:
            $ref: '#/components/schemas/SingleJob'
          type: array
          title: Jobs
          description: List of jobs with their job_id, status, type, raw_config, created_at, num_pages and duration
        next_cursor:
          type: string
          nullable: true
          title: Next Cursor
          description: Cursor to fetch the next page of results. If null, there are no more results.
      type: object
      required:
      - jobs
      title: JobsResponse
    OCRLine:
      properties:
        text:
          type: string
          title: Text
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          type: number
          nullable: true
          title: Confidence
          description: OCR confidence score between 0 and 1, where 1 indicates highest confidence
        chunk_index:
          type: integer
          nullable: true
          title: Chunk Index
          description: The index of the chunk that the line belongs to.
        rotation:
          type: integer
          nullable: true
          title: Rotation
          description: The rotation angle in degrees, from 0 to 360, counterclockwise.
      type: object
      required:
      - text
      - bbox
      title: OCRLine
    OCRResult:
      properties:
        words:
          items:
            $ref: '#/components/schemas/OCRWord'
          type: array
          title: Words
        lines:
          items:
            $ref: '#/components/schemas/OCRLine'
          type: array
          title: Lines
      type: object
      required:
      - words
      - lines
      title: OCRResult
    OCRWord:
      properties:
        text:
          type: string
          title: Text
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          type: number
          nullable: true
          title: Confidence
          description: OCR confidence score between 0 and 1, where 1 indicates highest confidence
        chunk_index:
          type: integer
          nullable: true
          title: Chunk Index
          description: The index of the chunk that the word belongs to.
        rotation:
          type: integer
          nullable: true
          title: Rotation
          description: The rotation angle in degrees, from 0 to 360, counterclockwise.
      type: object
      required:
      - text
      - bbox
      title: OCRWord
    PageRange:
      properties:
        start:
          type: integer
          nullable: true
          title: Start
          description: The page number to start processing from (1-indexed).
        end:
          type: integer
          nullable: true
          title: End
          description: The page number to stop processing at (1-indexed).
      type: object
      title: PageRange
    ParseBlock:
      properties:
        type:
          type: string
          enum:
          - Header
          - Footer
          - Title
          - Section Header
          - Page Number
          - List Item
          - Figure
          - Table
          - Key Value
          - Text
          - Comment
          - Signature
          title: Type
          description: The type of block extracted from the document.
        bbox:
          $ref: '#/components/schemas/BoundingBox'
          description: The bounding box of the block extracted from the document.
        content:
          type: string
          title: Content
          description: The content of the block extracted from the document.
        image_url:
          type: string
          nullable: true
          title: Image Url
          description: (Experimental) The URL of the image associated with the block.
        chart_data:
          items:
            type: string
          type: array
          nullable: true
          title: Chart Data
          description: (Experimental) The URL/link to chart data JSON for figure blocks processed by chart agent.
        confidence:
          type: string
          nullable: true
          title: Confidence
          description: The confidence for the block. It is either low or high and takes into account factors like OCR and
            table structure
          default: low
        granular_confidence:
          $ref: '#/components/schemas/GranularConfidence'
          nullable: true
          description: Granular confidence scores for the block. It is a dictionary of confidence scores for the block. The
            confidence scores will not be None if the user has enabled numeric confidence scores.
        extra:
          additionalProperties: true
          type: object
          nullable: true
          title: Extra
          description: Extra metadata fields for the block. Fields like 'is_chart' will only appear when set to True.
      type: object
      required:
      - type
      - bbox
      - content
      title: ParseBlock
    ParseChunk:
      properties:
        content:
          type: string
          title: Content
          description: The content of the chunk extracted from the document.
        embed:
          type: string
          title: Embed
          description: Chunk content optimized for embedding and retrieval.
        enriched:
          type: string
          nullable: true
          title: Enriched
          description: The enriched content of the chunk extracted from the document.
        enrichment_success:
          type: boolean
          title: Enrichment Success
          description: Whether the enrichment was successful.
          default: false
        blocks:
          items:
            $ref: '#/components/schemas/ParseBlock'
          type: array
          title: Blocks
      type: object
      required:
      - content
      - embed
      - enriched
      - blocks
      title: ParseChunk
    ParseOptions:
      properties:
        enhance:
          $ref: '#/components/schemas/Enhance'
          default:
            agentic: []
            summarize_figures: true
            intelligent_ordering: false
        retrieval:
          $ref: '#/components/schemas/Retrieval'
          default:
            chunking:
              chunk_mode: disabled
              chunk_overlap: 0
            filter_blocks: []
            embedding_optimized: false
        formatting:
          $ref: '#/components/schemas/Formatting'
          default:
            add_page_markers: false
            table_output_format: dynamic
            merge_tables: false
            include: []
        spreadsheet:
          $ref: '#/components/schemas/Spreadsheet'
          default:
            split_large_tables:
              enabled: true
              size: 50
            include: []
            clustering: accurate
            exclude: []
        settings:
          $ref: '#/components/schemas/Settings'
          default:
            ocr_system: standard
            extraction_mode: hybrid
            force_url_result: false
            return_ocr_data: false
            return_images: []
            embed_pdf_metadata: false
            embed_pdf_metadata_dpi: 100
            persist_results: false
            hybrid_vpc: {}
      type: object
      title: ParseOptions
    ParseResponse:
      properties:
        response_type:
          type: string
          title: Response Type
          default: parse
          enum:
          - parse
        job_id:
          type: string
          title: Job Id
        duration:
          type: number
          title: Duration
          description: The duration of the parse request in seconds.
        pdf_url:
          type: string
          nullable: true
          title: Pdf Url
          description: The storage URL of the converted PDF file.
        studio_link:
          type: string
          nullable: true
          title: Studio Link
          description: The link to the studio pipeline for the document.
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          anyOf:
          - $ref: '#/components/schemas/FullResult'
          - $ref: '#/components/schemas/UrlResult'
          title: Result
          description: The response from the document processing service. Note that there can be two types of responses, Full
            Result and URL Result. This is due to limitations on the max return size on HTTPS. If the response is too large,
            it will be returned as a presigned URL in the URL response. You should handle this in your application.
        parse_mode:
          type: string
          enum:
          - base
          - lite
          nullable: true
          title: Parse Mode
          description: Which pipeline produced this response. ``lite`` means Reducto Flash Lite served the request; ``base``
            is the standard pipeline. Optional / nullable for forward compatibility — older API instances or persisted responses
            written before this field existed will leave it ``None``; treat ``None`` as ``base``.
      type: object
      required:
      - job_id
      - duration
      - usage
      - result
      title: ParseResponse
    ParseUsage:
      properties:
        num_pages:
          type: integer
          title: Num Pages
        credits:
          type: number
          nullable: true
          title: Credits
        credit_breakdown:
          additionalProperties:
            type: number
          propertyNames:
            enum:
            - page
            - html_page
            - docx_native_page
            - chart_agent
            - spreadsheet_cells
            - billable_spreadsheet_pages
            - agentic
            - complex
            - enrich_table
            - figure_summary
            - table_summary
            - key_value
            - agentic_text
            - promptable_agentic_text
            - reducto_lite_page
          type: object
          nullable: true
          title: Credit Breakdown
        page_billing_breakdown:
          additionalProperties:
            items:
              type: string
              enum:
              - page
              - html_page
              - docx_native_page
              - agentic
              - complex
              - chart_agent
              - spreadsheet_cells
              - billable_spreadsheet_pages
              - enrich_table
              - figure_summary
              - table_summary
              - key_value
              - agentic_text
              - promptable_agentic_text
              - reducto_lite_page
            type: array
          type: object
          nullable: true
          title: Page Billing Breakdown
          description: Per-page breakdown of features used. Maps 1-indexed page numbers (as strings) to the list of billing
            features applied on that page (e.g. 'page', 'complex', 'chart_agent').
        non_empty_cell_count:
          type: integer
          nullable: true
          title: Non Empty Cell Count
          description: Total non-empty cells across all sheets. Only set for spreadsheet inputs.
      type: object
      required:
      - num_pages
      title: ParseUsage
    PipelineResponse:
      properties:
        response_type:
          type: string
          title: Response Type
          default: pipeline
          enum:
          - pipeline
        job_id:
          type: string
          title: Job Id
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          $ref: '#/components/schemas/PipelineResult'
      type: object
      required:
      - job_id
      - usage
      - result
      title: PipelineResponse
    PipelineResult:
      properties:
        parse:
          anyOf:
          - $ref: '#/components/schemas/ParseResponse'
          - items:
              $ref: '#/components/schemas/ParseResponse'
            type: array
          title: Parse
          nullable: true
        extract:
          anyOf:
          - items:
              $ref: '#/components/schemas/ExtractSplitResponse'
            type: array
          - $ref: '#/components/schemas/ExtractResponse'
          - $ref: '#/components/schemas/V3ExtractResponse'
          title: Extract
          nullable: true
        split:
          $ref: '#/components/schemas/SplitResponse'
          nullable: true
        edit:
          $ref: '#/components/schemas/EditResponse'
          nullable: true
      type: object
      required:
      - parse
      - extract
      - split
      title: PipelineResult
    PipelineSettings:
      properties:
        document_password:
          type: string
          nullable: true
          title: Document Password
          description: Password to decrypt password-protected documents.
      additionalProperties: false
      type: object
      title: PipelineSettings
      description: Settings for pipeline execution that override pipeline defaults.
    QueuePriority:
      type: string
      enum:
      - auto
      - standard
      - batch
      title: QueuePriority
      description: "Customer-facing queue priority for parse jobs.\n\n``AUTO`` and ``STANDARD`` are wire-level synonyms —\
        \ both mean \"default\nqueue.\"  Downstream code only branches on ``BATCH``, so the difference\nis purely lexical.\
        \  Both values are first-class enum members so a\nrolling deploy is safe in either direction: a pre-#6134 pod (enum\
        \ =\n``{auto, batch}``) and a post-fix pod (enum = ``{auto, standard, batch}``)\ncan both deserialise payloads the\
        \ other produces.\n\nMigration plan (three stages, each a separate PR fully deployed before\nthe next):\n\n1. **Widen**\
        \ *(this PR)*: enum = ``{AUTO, STANDARD, BATCH}``,\n   ``default=AUTO``.  Compatible with the still-running pre-#6134\n\
        \   prod pods after yesterday's rollback.\n2. **Flip default**: same enum, ``default=STANDARD``.  Ship after\n   stage\
        \ 1 is everywhere.\n3. **Narrow** *(future, optional)*: drop ``AUTO`` once every Redis\n   payload and SDK has migrated.\
        \  Until then it stays as an\n   accepted alias."
    ResponseConfidence:
      properties:
        categories:
          items:
            $ref: '#/components/schemas/CategoryConfidence'
          type: array
          title: Categories
      type: object
      required:
      - categories
      title: ResponseConfidence
      description: Overall confidence breakdown for classification response.
    Retrieval:
      properties:
        chunking:
          $ref: '#/components/schemas/Chunking'
          default:
            chunk_mode: disabled
            chunk_overlap: 0
        filter_blocks:
          items:
            type: string
            enum:
            - Header
            - Footer
            - Title
            - Section Header
            - Page Number
            - List Item
            - Figure
            - Table
            - Key Value
            - Text
            - Comment
            - Signature
          type: array
          title: Filter Blocks
          description: A list of block types to filter out from 'content' and 'embed' fields. By default, no blocks are filtered.
          default: []
        embedding_optimized:
          type: boolean
          title: Embedding Optimized
          description: If True, use embedding optimized mode. Defaults to False.
          default: false
      type: object
      title: Retrieval
    Settings:
      properties:
        ocr_system:
          type: string
          enum:
          - standard
          - legacy
          title: Ocr System
          description: Standard is our best multilingual OCR system. Legacy only supports germanic languages and is available
            for backwards compatibility.
          default: standard
        extraction_mode:
          type: string
          enum:
          - ocr
          - hybrid
          title: Extraction Mode
          description: The mode to use for text extraction from PDFs. OCR mode uses optical character recognition only. Hybrid
            mode combines OCR with embedded PDF text for best accuracy (default).
          default: hybrid
        force_url_result:
          type: boolean
          title: Force Url Result
          description: Force the result to be returned in URL form.
          default: false
        force_file_extension:
          type: string
          nullable: true
          title: Force File Extension
          description: Force the URL to be downloaded as a specific file extension (e.g. `.png`).
        return_ocr_data:
          type: boolean
          title: Return Ocr Data
          description: If True, return OCR data in the result. Defaults to False.
          default: false
        return_images:
          items:
            type: string
            enum:
            - figure
            - table
            - page
          type: array
          title: Return Images
          description: Whether to return images for the specified block types. 'page' returns full page images. By default,
            no images are returned.
          default: []
        embed_pdf_metadata:
          type: boolean
          title: Embed Pdf Metadata
          description: If True, embed OCR metadata into the returned PDF. Defaults to False.
          default: false
        embed_pdf_metadata_dpi:
          type: integer
          maximum: 250
          minimum: 50
          title: Embed Pdf Metadata Dpi
          description: Render DPI used when rasterizing the source PDF before embedding the OCR text layer (only applies when
            ``embed_pdf_metadata`` is True). Lower values produce dramatically smaller output PDFs; higher values preserve
            more detail when zoomed past 200%. Defaults to 100 (good for on-screen viewing); raise toward the source scan
            DPI for crisper output. Min 50, max 250.
          default: 100
        persist_results:
          type: boolean
          title: Persist Results
          description: If True, persist the results indefinitely. Defaults to False.
          default: false
        tenant_throttling:
          $ref: '#/components/schemas/TenantThrottling'
          nullable: true
          description: Per-tenant throttling for multi-tenant applications. Tag each request with your tenant's id to bound
            how much of your account's concurrency a single tenant can consume. Account-level throttles still apply.
        timeout:
          type: number
          nullable: true
          title: Timeout
          description: The timeout for the job in seconds.
        page_range:
          anyOf:
          - $ref: '#/components/schemas/PageRange'
          - items:
              $ref: '#/components/schemas/PageRange'
            type: array
          - items:
              type: integer
            type: array
          - items:
              type: string
            type: array
          title: Page Range
          description: The page range to process (1-indexed). By default, the entire document is processed. For spreadsheets,
            you can also provide a list of sheet names.
          nullable: true
        document_password:
          type: string
          nullable: true
          title: Document Password
          description: Password to decrypt password-protected documents.
        hybrid_vpc:
          $ref: '#/components/schemas/HybridVpcSettings'
          description: Hybrid VPC request-scoped settings.
          default: {}
      type: object
      title: Settings
    SingleJob:
      properties:
        job_id:
          type: string
          title: Job Id
        status:
          type: string
          enum:
          - Pending
          - Completed
          - Failed
          - Idle
          - InProgress
          - Completing
          - Cancelled
          title: Status
        type:
          type: string
          enum:
          - Parse
          - Extract
          - Split
          - Edit
          - Pipeline
          - Classify
          title: Type
        raw_config:
          type: string
          title: Raw Config
        created_at:
          type: string
          format: date-time
          title: Created At
        source:
          nullable: true
          title: Source
        num_pages:
          type: integer
          nullable: true
          title: Num Pages
        total_pages:
          type: integer
          nullable: true
          title: Total Pages
        duration:
          type: number
          nullable: true
          title: Duration
        bucket:
          nullable: true
          title: Bucket
      type: object
      required:
      - job_id
      - status
      - type
      - raw_config
      - created_at
      - num_pages
      - total_pages
      - duration
      title: SingleJob
    Split:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            type: integer
          type: array
          title: Pages
        conf:
          type: string
          enum:
          - high
          - low
          title: Conf
          default: low
        partitions:
          items:
            $ref: '#/components/schemas/SplitPartition'
          type: array
          nullable: true
          title: Partitions
      type: object
      required:
      - name
      - pages
      title: Split
    SplitCategory:
      properties:
        name:
          type: string
          title: Name
        description:
          type: string
          title: Description
        partition_key:
          type: string
          nullable: true
          title: Partition Key
      type: object
      required:
      - name
      - description
      title: SplitCategory
    SplitLargeTableSizes:
      properties:
        row:
          type: integer
          nullable: true
          title: Row
          description: The number of rows to include in each chunk when splitting large tables. Does not chunk rows if set
            to None.
        column:
          type: integer
          nullable: true
          title: Column
          description: The number of columns to include in each chunk when splitting large tables. Does not chunk columns
            if set to None.
      type: object
      title: SplitLargeTableSizes
    SplitLargeTables:
      properties:
        enabled:
          type: boolean
          title: Enabled
          description: If True, split large tables into smaller tables. Defaults to True.
          default: true
        size:
          anyOf:
          - type: integer
          - $ref: '#/components/schemas/SplitLargeTableSizes'
          title: Size
          description: The size of the tables to split into. Defaults to 50. Use 'row' and 'column' to independently specify
            the number of rows and columns to include when splitting. If you only want to split by rows or columns, set the
            other value to None.
          default: 50
      type: object
      title: SplitLargeTables
    SplitPartition:
      properties:
        name:
          type: string
          title: Name
        pages:
          items:
            type: integer
          type: array
          title: Pages
        conf:
          type: string
          enum:
          - high
          - low
          title: Conf
          default: low
      type: object
      required:
      - name
      - pages
      title: SplitPartition
    SplitResponse:
      properties:
        response_type:
          type: string
          title: Response Type
          default: split
          enum:
          - split
        usage:
          $ref: '#/components/schemas/ParseUsage'
        result:
          anyOf:
          - $ref: '#/components/schemas/SplitResult'
          - $ref: '#/components/schemas/DeepSplitResult'
          title: Result
          description: The split result.
      type: object
      required:
      - usage
      - result
      title: SplitResponse
    SplitResult:
      properties:
        section_mapping:
          additionalProperties:
            items:
              type: integer
            type: array
          type: object
          nullable: true
          title: Section Mapping
        splits:
          items:
            $ref: '#/components/schemas/Split'
          type: array
          title: Splits
      type: object
      required:
      - section_mapping
      - splits
      title: SplitResult
    SplitSettings:
      properties:
        table_cutoff:
          type: string
          enum:
          - truncate
          - preserve
          title: Table Cutoff
          description: If tables should be truncated to the first few rows or if all content should be preserved. truncate
            improves latency, preserve is recommended for cases where partition_key is being used and the partition_key may
            be included within the table. Defaults to truncate
          default: truncate
        allow_page_overlap:
          type: boolean
          title: Allow Page Overlap
          description: If True, a page can belong to multiple categories/partitions. If False, each page must belong to exactly
            one category. Defaults to True.
          default: true
        deep_split:
          type: boolean
          title: Deep Split
          description: If True, uses the deep split agent for higher-quality document splitting. Off by default.
          default: false
      type: object
      title: SplitSettings
    Spreadsheet:
      properties:
        split_large_tables:
          $ref: '#/components/schemas/SplitLargeTables'
          default:
            enabled: true
            size: 50
        include:
          items:
            type: string
            enum:
            - cell_colors
            - formula
            - dropdowns
          type: array
          title: Include
          description: Whether to include cell color, formula, and dropdown information in the output.
          default: []
        clustering:
          type: string
          enum:
          - accurate
          - fast
          - disabled
          title: Clustering
          description: In a spreadsheet with different tables inside, we enable splitting up the tables by default. Accurate
            mode applies more powerful models for superior accuracy, at 5× the default per-cell rate. Disabling will register
            as one large table.
          default: accurate
        exclude:
          items:
            type: string
            enum:
            - hidden_sheets
            - hidden_rows
            - hidden_cols
            - styling
            - spreadsheet_images
          type: array
          title: Exclude
          description: Whether to exclude hidden sheets, rows, or columns in the output.
          default: []
        max_cell_count:
          type: integer
          minimum: 1
          nullable: true
          title: Max Cell Count
          description: Maximum total non-empty cells allowed across all sheets. If exceeded, the request is rejected with
            a 422 error. Set to null to disable the limit. Defaults to null.
      type: object
      title: Spreadsheet
    SvixWebhookConfig:
      properties:
        mode:
          type: string
          title: Mode
          default: svix
          enum:
          - svix
        channels:
          items:
            type: string
          type: array
          title: Channels
          description: A list of Svix channels the message will be delivered down, omit to send to all channels.
      type: object
      title: SvixWebhookConfig
    SyncExtractConfig:
      properties:
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        parsing:
          $ref: '#/components/schemas/ParseOptions'
          description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file,
            then this configuration will be ignored.
          default:
            enhance:
              agentic: []
              intelligent_ordering: false
              summarize_figures: true
            retrieval:
              chunking:
                chunk_mode: disabled
                chunk_overlap: 0
              embedding_optimized: false
              filter_blocks: []
            formatting:
              add_page_markers: false
              include: []
              merge_tables: false
              table_output_format: dynamic
            spreadsheet:
              clustering: accurate
              exclude: []
              include: []
              split_large_tables:
                enabled: true
                size: 50
            settings:
              embed_pdf_metadata: false
              embed_pdf_metadata_dpi: 100
              extraction_mode: hybrid
              force_url_result: false
              hybrid_vpc: {}
              ocr_system: standard
              persist_results: false
              return_images: []
              return_ocr_data: false
        instructions:
          $ref: '#/components/schemas/Instructions'
          description: The instructions to use for the extraction.
          default:
            schema: {}
            system_prompt: Be precise and thorough.
        settings:
          $ref: '#/components/schemas/ExtractSettings'
          description: The settings to use for the extraction.
          default:
            include_images: false
            optimize_for_latency: false
            array_extract: false
            deep_extract: false
            citations:
              enabled: false
              numerical_confidence: true
      type: object
      required:
      - input
      title: SyncExtractConfig
    SyncParseConfig:
      properties:
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        enhance:
          $ref: '#/components/schemas/Enhance'
          default:
            agentic: []
            summarize_figures: true
            intelligent_ordering: false
        retrieval:
          $ref: '#/components/schemas/Retrieval'
          default:
            chunking:
              chunk_mode: disabled
              chunk_overlap: 0
            filter_blocks: []
            embedding_optimized: false
        formatting:
          $ref: '#/components/schemas/Formatting'
          default:
            add_page_markers: false
            table_output_format: dynamic
            merge_tables: false
            include: []
        spreadsheet:
          $ref: '#/components/schemas/Spreadsheet'
          default:
            split_large_tables:
              enabled: true
              size: 50
            include: []
            clustering: accurate
            exclude: []
        settings:
          $ref: '#/components/schemas/Settings'
          default:
            ocr_system: standard
            extraction_mode: hybrid
            force_url_result: false
            return_ocr_data: false
            return_images: []
            embed_pdf_metadata: false
            embed_pdf_metadata_dpi: 100
            persist_results: false
            hybrid_vpc: {}
      type: object
      required:
      - input
      title: SyncParseConfig
    SyncSplitConfig:
      properties:
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        parsing:
          $ref: '#/components/schemas/ParseOptions'
          description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file,
            then this configuration will be ignored.
          default:
            enhance:
              agentic: []
              intelligent_ordering: false
              summarize_figures: true
            retrieval:
              chunking:
                chunk_mode: disabled
                chunk_overlap: 0
              embedding_optimized: false
              filter_blocks: []
            formatting:
              add_page_markers: false
              include: []
              merge_tables: false
              table_output_format: dynamic
            spreadsheet:
              clustering: accurate
              exclude: []
              include: []
              split_large_tables:
                enabled: true
                size: 50
            settings:
              embed_pdf_metadata: false
              embed_pdf_metadata_dpi: 100
              extraction_mode: hybrid
              force_url_result: false
              hybrid_vpc: {}
              ocr_system: standard
              persist_results: false
              return_images: []
              return_ocr_data: false
        split_description:
          items:
            $ref: '#/components/schemas/SplitCategory'
          type: array
          title: Split Description
          description: The configuration options for processing the document.
        split_rules:
          type: string
          title: Split Rules
          description: The prompt that describes rules for splitting the document.
          default: Split the document into the applicable sections. Sections may only overlap at their first and last page
            if at all.
        settings:
          $ref: '#/components/schemas/SplitSettings'
          description: The settings for split processing.
          default:
            table_cutoff: truncate
            allow_page_overlap: true
            deep_split: false
      type: object
      required:
      - input
      - split_description
      title: SyncSplitConfig
    TableAgentic:
      properties:
        scope:
          type: string
          title: Scope
          enum:
          - table
        prompt:
          type: string
          nullable: true
          title: Prompt
          description: Custom prompt for table agentic.
        mode:
          type: string
          enum:
          - default
          - auto
          - max
          title: Mode
          description: 'Mode for table agentic: ''default'' selectively applies enrichment only to tables likely to benefit,
            and ''max'' runs enrichment on all tables.'
          default: default
      type: object
      required:
      - scope
      title: TableAgentic
    TenantThrottling:
      properties:
        tenant_id:
          type: string
          maxLength: 256
          minLength: 1
          title: Tenant Id
          description: Your identifier for the tenant (customer, workspace, organization) this request belongs to. Used only
            for noisy-neighbor throttling inside your account.
        max_share:
          type: number
          maximum: 1
          title: Max Share
          description: Maximum fraction of your account's concurrency ceiling this tenant may use, between 0 (exclusive) and
            1. Defaults to 0.5.
          default: 0.5
          minimum: 0
          exclusiveMinimum: true
      type: object
      required:
      - tenant_id
      title: TenantThrottling
    TextAgentic:
      properties:
        scope:
          type: string
          title: Scope
          enum:
          - text
        prompt:
          type: string
          nullable: true
          title: Prompt
          description: 'Custom instructions for agentic text. Note: This only applies to form regions (key-value).'
      type: object
      required:
      - scope
      title: TextAgentic
    UploadResponse:
      properties:
        file_id:
          type: string
          title: File Id
        presigned_url:
          type: string
          nullable: true
          title: Presigned Url
      type: object
      required:
      - file_id
      title: UploadResponse
    UrlResult:
      properties:
        type:
          type: string
          title: Type
          description: type = 'url'
          enum:
          - url
        url:
          type: string
          title: Url
        result_id:
          type: string
          title: Result Id
      type: object
      required:
      - type
      - url
      - result_id
      title: UrlResult
    V3AsyncPipelineConfig:
      properties:
        async:
          $ref: '#/components/schemas/config__v3__AsyncConfig'
          description: The configuration options for asynchronous processing (default synchronous).
          default:
            priority: false
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        pipeline_id:
          type: string
          title: Pipeline Id
          description: The ID of the pipeline to use for the document.
        settings:
          $ref: '#/components/schemas/PipelineSettings'
          default: {}
      type: object
      required:
      - input
      - pipeline_id
      title: V3AsyncPipelineConfig
    V3ExtractResponse:
      additionalProperties: true
      type: object
    V3PipelineConfig:
      properties:
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        pipeline_id:
          type: string
          title: Pipeline Id
          description: The ID of the pipeline to use for the document.
        settings:
          $ref: '#/components/schemas/PipelineSettings'
          default: {}
      type: object
      required:
      - input
      - pipeline_id
      title: V3PipelineConfig
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
            - type: string
            - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
        input:
          title: Input
        ctx:
          type: object
          title: Context
      type: object
      required:
      - loc
      - msg
      - type
      title: ValidationError
    WebhookConfigNew:
      properties:
        mode:
          type: string
          enum:
          - disabled
          - svix
          - direct
          title: Mode
          description: The mode to use for webhook delivery. Defaults to 'disabled'. We recommend using 'svix' for production
            environments.
          default: disabled
        url:
          type: string
          title: Url
          description: The URL to send the webhook to (if using direct webhoook).
        metadata:
          title: Metadata
          description: JSON metadata included in webhook request body
        channels:
          items:
            type: string
          type: array
          title: Channels
          description: A list of Svix channels the message will be delivered down, omit to send to all channels.
      type: object
      title: WebhookConfigNew
    config__v3__AsyncConfig:
      properties:
        metadata:
          title: Metadata
          description: JSON metadata included in webhook request body. Defaults to None.
        priority:
          type: boolean
          title: Priority
          description: If True, attempts to process the job with priority if the user has priority processing budget available;
            by default, sync jobs are prioritized above async jobs.
          default: false
        webhook:
          anyOf:
          - $ref: '#/components/schemas/SvixWebhookConfig'
          - $ref: '#/components/schemas/DirectWebhookConfig'
          title: Webhook
          description: The webhook configuration for the asynchronous processing.
          nullable: true
      type: object
      title: AsyncConfig
    config__v3__AsyncSplitConfig:
      properties:
        async:
          $ref: '#/components/schemas/config__v3__AsyncConfig'
          description: The configuration options for asynchronous processing (default synchronous).
          default:
            priority: false
        input:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          - $ref: '#/components/schemas/UploadResponse'
          title: Input
          description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
            \ the following:\n            1. A publicly available URL\n            2. A presigned S3 URL\n            3. A\
            \ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n           \
            \ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n            5. A list of URLs (for multi-document\
            \ pipelines, V3 API only)\n\n            For edit pipelines, this should be a string containing the edit instructions "
        parsing:
          $ref: '#/components/schemas/ParseOptions'
          description: The configuration options for parsing the document. If you are passing in a jobid:// URL for the file,
            then this configuration will be ignored.
          default:
            enhance:
              agentic: []
              intelligent_ordering: false
              summarize_figures: true
            retrieval:
              chunking:
                chunk_mode: disabled
                chunk_overlap: 0
              embedding_optimized: false
              filter_blocks: []
            formatting:
              add_page_markers: false
              include: []
              merge_tables: false
              table_output_format: dynamic
            spreadsheet:
              clustering: accurate
              exclude: []
              include: []
              split_large_tables:
                enabled: true
                size: 50
            settings:
              embed_pdf_metadata: false
              embed_pdf_metadata_dpi: 100
              extraction_mode: hybrid
              force_url_result: false
              hybrid_vpc: {}
              ocr_system: standard
              persist_results: false
              return_images: []
              return_ocr_data: false
        split_description:
          items:
            $ref: '#/components/schemas/SplitCategory'
          type: array
          title: Split Description
          description: The configuration options for processing the document.
        split_rules:
          type: string
          title: Split Rules
          description: The prompt that describes rules for splitting the document.
          default: Split the document into the applicable sections. Sections may only overlap at their first and last page
            if at all.
        settings:
          $ref: '#/components/schemas/SplitSettings'
          description: The settings for split processing.
          default:
            table_cutoff: truncate
            allow_page_overlap: true
            deep_split: false
      type: object
      required:
      - input
      - split_description
      title: AsyncSplitConfig