arazzo: 1.0.1
info:
  title: Mindee Custom Document Extraction
  summary: Enqueue an arbitrary document against a custom model with raw text capture, poll until processed, then read fields and full text.
  description: >-
    Applies the Mindee asynchronous extraction pattern to a custom document
    type backed by a user-defined extraction model. The workflow uploads any
    document against the supplied model with the raw_text option enabled, polls
    the shared jobs endpoint until the job is Processed, and fetches the
    inference to read both the model-specific extracted fields and the complete
    raw text of the document. Every step spells out its request inline so the
    flow can be read and executed without opening the underlying OpenAPI
    description.
  version: 1.0.0
sourceDescriptions:
- name: extractionApi
  url: ../openapi/mindee-extraction-api-openapi.yml
  type: openapi
- name: jobsApi
  url: ../openapi/mindee-jobs-api-openapi.yml
  type: openapi
workflows:
- workflowId: custom-document-extraction
  summary: Upload a custom document with raw text capture and read fields plus full text.
  description: >-
    Sends a document to the extraction enqueue endpoint against a custom model
    with raw_text enabled, polls the job until processing finishes, and
    retrieves both the extracted fields and the raw document text.
  inputs:
    type: object
    required:
    - authorization
    - modelId
    - file
    properties:
      authorization:
        type: string
        description: Mindee API key sent in the Authorization header.
      modelId:
        type: string
        description: UUID of the custom extraction model to apply.
      file:
        type: string
        description: The document file to upload as binary form data.
      filename:
        type: string
        description: Optional filename to associate with the uploaded document.
      textContext:
        type: string
        description: Optional additional context passed to the model for this inference.
  steps:
  - stepId: enqueueDocument
    description: >-
      Send the document to the asynchronous extraction queue against the custom
      model with raw_text enabled so the full document text is returned.
    operationId: Enqueue_Extraction_Product_Inference_v2_products_extraction_enqueue_post
    parameters:
    - name: Authorization
      in: header
      value: $inputs.authorization
    requestBody:
      contentType: multipart/form-data
      payload:
        model_id: $inputs.modelId
        file: $inputs.file
        filename: $inputs.filename
        raw_text: true
        text_context: $inputs.textContext
    successCriteria:
    - condition: $statusCode == 202
    outputs:
      jobId: $response.body#/job/id
      status: $response.body#/job/status
  - stepId: pollJob
    description: >-
      Poll the shared jobs endpoint until the custom extraction job reports
      Processed or Failed.
    operationId: Get_Job_Status_v2_jobs__job_id__get
    parameters:
    - name: Authorization
      in: header
      value: $inputs.authorization
    - name: job_id
      in: path
      value: $steps.enqueueDocument.outputs.jobId
    - name: redirect
      in: query
      value: false
    successCriteria:
    - condition: $statusCode == 200
    outputs:
      status: $response.body#/job/status
    onSuccess:
    - name: jobProcessed
      type: goto
      stepId: getResult
      criteria:
      - context: $response.body
        condition: $.job.status == "Processed"
        type: jsonpath
    - name: jobPending
      type: goto
      stepId: pollJob
      criteria:
      - context: $response.body
        condition: $.job.status == "Processing"
        type: jsonpath
  - stepId: getResult
    description: >-
      Retrieve the completed extraction inference and read the custom fields
      and the full raw text parsed from the document.
    operationId: Get_Extraction_Product_Result_v2_products_extraction_results__inference_id__get
    parameters:
    - name: Authorization
      in: header
      value: $inputs.authorization
    - name: inference_id
      in: path
      value: $steps.enqueueDocument.outputs.jobId
    successCriteria:
    - condition: $statusCode == 200
    outputs:
      inferenceId: $response.body#/inference/id
      fields: $response.body#/inference/result/fields
      rawText: $response.body#/inference/result/raw_text
  outputs:
    jobId: $steps.enqueueDocument.outputs.jobId
    fields: $steps.getResult.outputs.fields
    rawText: $steps.getResult.outputs.rawText