{ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://aws.amazon.com/textract/schemas/documentanalysis", "title": "DocumentAnalysis", "description": "Schema for an Amazon Textract document analysis response containing detected blocks of text, tables, forms, and layout elements extracted from a document.", "type": "object", "properties": { "DocumentMetadata": { "type": "object", "description": "Metadata about the analyzed document.", "properties": { "Pages": { "type": "integer", "description": "The number of pages detected in the document.", "minimum": 1 } }, "required": ["Pages"] }, "Blocks": { "type": "array", "description": "The items detected in the document, including text lines, words, tables, and form elements.", "items": { "type": "object", "properties": { "BlockType": { "type": "string", "description": "The type of text item detected.", "enum": [ "KEY_VALUE_SET", "PAGE", "LINE", "WORD", "TABLE", "CELL", "SELECTION_ELEMENT", "MERGED_CELL", "TITLE", "QUERY", "QUERY_RESULT", "SIGNATURE", "TABLE_TITLE", "TABLE_FOOTER", "LAYOUT_TEXT", "LAYOUT_TITLE", "LAYOUT_HEADER", "LAYOUT_FOOTER", "LAYOUT_SECTION_HEADER", "LAYOUT_PAGE_NUMBER", "LAYOUT_LIST", "LAYOUT_FIGURE", "LAYOUT_TABLE", "LAYOUT_KEY_VALUE" ] }, "Confidence": { "type": "number", "description": "The confidence that Amazon Textract has in the accuracy of the detected block.", "minimum": 0, "maximum": 100 }, "Text": { "type": "string", "description": "The word or line of text that is recognized by Amazon Textract." }, "TextType": { "type": "string", "description": "The kind of text detected.", "enum": ["HANDWRITING", "PRINTED"] }, "RowIndex": { "type": "integer", "description": "The row in which a table cell is located.", "minimum": 1 }, "ColumnIndex": { "type": "integer", "description": "The column in which a table cell appears.", "minimum": 1 }, "RowSpan": { "type": "integer", "description": "The number of rows that a table cell spans.", "minimum": 1 }, "ColumnSpan": { "type": "integer", "description": "The number of columns that a table cell spans.", "minimum": 1 }, "Geometry": { "type": "object", "description": "The location of the detected block on the document page.", "properties": { "BoundingBox": { "type": "object", "description": "An axis-aligned bounding box for the detected block.", "properties": { "Width": { "type": "number", "description": "The width of the bounding box as a ratio of the overall document page width." }, "Height": { "type": "number", "description": "The height of the bounding box as a ratio of the overall document page height." }, "Left": { "type": "number", "description": "The left coordinate of the bounding box." }, "Top": { "type": "number", "description": "The top coordinate of the bounding box." } }, "required": ["Width", "Height", "Left", "Top"] }, "Polygon": { "type": "array", "description": "A fine-grained polygon around the detected block.", "items": { "type": "object", "properties": { "X": { "type": "number" }, "Y": { "type": "number" } }, "required": ["X", "Y"] } } } }, "Id": { "type": "string", "description": "The identifier for the recognized text block." }, "Relationships": { "type": "array", "description": "A list of relationship objects that describe how blocks are related to each other.", "items": { "type": "object", "properties": { "Type": { "type": "string", "description": "The type of relationship.", "enum": ["VALUE", "CHILD", "COMPLEX_FEATURES", "MERGED_CELL", "TITLE", "ANSWER", "TABLE", "TABLE_TITLE", "TABLE_FOOTER"] }, "Ids": { "type": "array", "description": "An array of IDs for related blocks.", "items": { "type": "string" } } }, "required": ["Type", "Ids"] } }, "EntityTypes": { "type": "array", "description": "The type of entity such as KEY or VALUE.", "items": { "type": "string", "enum": ["KEY", "VALUE", "COLUMN_HEADER", "TABLE_TITLE", "TABLE_FOOTER", "TABLE_SECTION_TITLE", "TABLE_SUMMARY", "STRUCTURED_TABLE", "SEMI_STRUCTURED_TABLE"] } }, "SelectionStatus": { "type": "string", "description": "The selection status of a selection element.", "enum": ["SELECTED", "NOT_SELECTED"] }, "Page": { "type": "integer", "description": "The page on which the block was detected.", "minimum": 1 }, "Query": { "type": "object", "description": "The query that was used for query-based analysis.", "properties": { "Text": { "type": "string" }, "Alias": { "type": "string" }, "Pages": { "type": "array", "items": { "type": "string" } } }, "required": ["Text"] } }, "required": ["BlockType"] } }, "AnalyzeDocumentModelVersion": { "type": "string", "description": "The version of the model used to analyze the document." } }, "required": ["DocumentMetadata", "Blocks"] }