{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/ExtractionResult", "title": "ExtractionResult", "type": "object", "description": "The data extraction results for a processed document, including all extracted fields, tables, and their confidence scores.", "properties": { "document_id": { "type": "string", "format": "uuid", "description": "The identifier of the source document" }, "template_id": { "type": "string", "format": "uuid", "description": "The extraction template that was applied" }, "classification": { "type": "string", "description": "The classified document type" }, "extraction_method": { "type": "string", "description": "The AI method used for extraction", "enum": [ "ocr", "nlp", "generative_ai", "template_based", "hybrid" ] }, "overall_confidence": { "type": "number", "description": "The overall confidence score for the extraction (0.0 to 1.0)", "minimum": 0.0, "maximum": 1.0 }, "fields": { "type": "array", "items": { "$ref": "#/components/schemas/ExtractedField" }, "description": "The list of extracted fields with values and confidence scores" }, "tables": { "type": "array", "items": { "$ref": "#/components/schemas/ExtractedTable" }, "description": "The list of extracted tables" }, "requires_review": { "type": "boolean", "description": "Whether any extracted values fall below the confidence threshold and require manual review" }, "extracted_at": { "type": "string", "format": "date-time", "description": "The date and time the extraction was performed" } } }