arazzo: 1.0.1 info: title: Mindee Custom Document Extraction summary: Enqueue an arbitrary document against a custom model with raw text capture, poll until processed, then read fields and full text. description: >- Applies the Mindee asynchronous extraction pattern to a custom document type backed by a user-defined extraction model. The workflow uploads any document against the supplied model with the raw_text option enabled, polls the shared jobs endpoint until the job is Processed, and fetches the inference to read both the model-specific extracted fields and the complete raw text of the document. Every step spells out its request inline so the flow can be read and executed without opening the underlying OpenAPI description. version: 1.0.0 sourceDescriptions: - name: extractionApi url: ../openapi/mindee-extraction-api-openapi.yml type: openapi - name: jobsApi url: ../openapi/mindee-jobs-api-openapi.yml type: openapi workflows: - workflowId: custom-document-extraction summary: Upload a custom document with raw text capture and read fields plus full text. description: >- Sends a document to the extraction enqueue endpoint against a custom model with raw_text enabled, polls the job until processing finishes, and retrieves both the extracted fields and the raw document text. inputs: type: object required: - authorization - modelId - file properties: authorization: type: string description: Mindee API key sent in the Authorization header. modelId: type: string description: UUID of the custom extraction model to apply. file: type: string description: The document file to upload as binary form data. filename: type: string description: Optional filename to associate with the uploaded document. textContext: type: string description: Optional additional context passed to the model for this inference. steps: - stepId: enqueueDocument description: >- Send the document to the asynchronous extraction queue against the custom model with raw_text enabled so the full document text is returned. operationId: Enqueue_Extraction_Product_Inference_v2_products_extraction_enqueue_post parameters: - name: Authorization in: header value: $inputs.authorization requestBody: contentType: multipart/form-data payload: model_id: $inputs.modelId file: $inputs.file filename: $inputs.filename raw_text: true text_context: $inputs.textContext successCriteria: - condition: $statusCode == 202 outputs: jobId: $response.body#/job/id status: $response.body#/job/status - stepId: pollJob description: >- Poll the shared jobs endpoint until the custom extraction job reports Processed or Failed. operationId: Get_Job_Status_v2_jobs__job_id__get parameters: - name: Authorization in: header value: $inputs.authorization - name: job_id in: path value: $steps.enqueueDocument.outputs.jobId - name: redirect in: query value: false successCriteria: - condition: $statusCode == 200 outputs: status: $response.body#/job/status onSuccess: - name: jobProcessed type: goto stepId: getResult criteria: - context: $response.body condition: $.job.status == "Processed" type: jsonpath - name: jobPending type: goto stepId: pollJob criteria: - context: $response.body condition: $.job.status == "Processing" type: jsonpath - stepId: getResult description: >- Retrieve the completed extraction inference and read the custom fields and the full raw text parsed from the document. operationId: Get_Extraction_Product_Result_v2_products_extraction_results__inference_id__get parameters: - name: Authorization in: header value: $inputs.authorization - name: inference_id in: path value: $steps.enqueueDocument.outputs.jobId successCriteria: - condition: $statusCode == 200 outputs: inferenceId: $response.body#/inference/id fields: $response.body#/inference/result/fields rawText: $response.body#/inference/result/raw_text outputs: jobId: $steps.enqueueDocument.outputs.jobId fields: $steps.getResult.outputs.fields rawText: $steps.getResult.outputs.rawText