{ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://raw.githubusercontent.com/earthpulse/ml-dataset/main/json-schema/schema.json#", "title": "ML Dataset Extension", "description": "STAC ML Dataset Extension for STAC Catalogs.", "oneOf": [ { "$comment": "This is the schema for STAC Catalogs.", "allOf": [ { "type": "object", "required": [ "type", "ml-dataset:name", "ml-dataset:tasks", "ml-dataset:inputs-type", "ml-dataset:annotations-type", "ml-dataset:version" ], "properties": { "type": { "const": "Catalog" }, "ml-dataset:name": { "type": "string", "title": "ML Dataset Name" }, "ml-dataset:tasks": { "type": "array", "items": { "type": "string", "enum": [ "image classification", "object detection", "segmentation" ] }, "title": "ML Dataset tasks" }, "ml-dataset:type": { "type": "string", "enum": [ "training", "test", "validation", "reference", "benchmark", "legacy" ], "title": "ML Dataset type" }, "ml-dataset:inputs-type": { "type": "array", "items": { "type": "string", "enum": [ "text", "image", "satellite imagery", "video" ] }, "title": "ML Dataset Inputs type" }, "ml-dataset:annotations-type": { "type": "string", "enum": [ "raster", "vector" ], "title": "ML Dataset Annotations type" }, "ml-dataset:quality-metrics": { "type": "array", "items": { "type": "object" }, "additionalItems": true, "title": "ML Dataset quality metrics" }, "ml-dataset:version": { "type": "string", "title": "ML Dataset version" }, "ml-dataset:splits": { "type": "array", "items": { "type": "string" }, "title": "ML Dataset splits" }, "links": { "type": "array", "items": { "$ref": "#/definitions/fields" } } } }, { "$ref": "#/definitions/stac_extensions" } ] }, { "$comment": "This is the schema for STAC Collections.", "allOf": [ { "type": "object", "required": [ "type", "ml-dataset:split-items" ], "properties": { "type": { "const": "Collection" }, "ml-dataset:split-items": { "type": "array", "items": { "type": "object" }, "title": "ML Dataset Items that conform the splits" } } }, { "$ref": "#/definitions/stac_extensions" } ] }, { "$comment": "This is the schema for STAC Items.", "allOf": [ { "type": "object", "required": [ "type", "properties", "links", "assets" ], "properties": { "type": { "const": "Feature" }, "properties": { "allOf": [ { "$comment": "Require fields here for item properties.", "required": [ " ml-dataset:split" ] }, { "$ref": "#/definitions/fields" } ] }, "assets": { "type": "object", "additionalProperties": { "$ref": "#/definitions/fields" } } } }, { "$ref": "#/definitions/stac_extensions" } ] } ], "definitions": { "stac_extensions": { "type": "object", "required": [ "stac_extensions" ], "properties": { "stac_extensions": { "type": "array", "contains": { "const": "https://raw.githubusercontent.com/earthpulse/ml-dataset/main/json-schema/schema.json" } } } }, "fields": { "$comment": "Add your new fields here. Don't require them here, do that above in the item schema.", "type": "object", "properties": { "ml-dataset:name": { "type": "string", "title": "ML Dataset Name" }, "ml-dataset:tasks": { "type": "array", "items": { "type": "string", "enum": [ "image classification", "object detection", "segmentation" ] }, "title": "ML Dataset tasks" }, "ml-dataset:type": { "type": "string", "enum": [ "training", "test", "validation", "reference", "benchmark", "legacy" ], "title": "ML Dataset type" }, "ml-dataset:inputs-type": { "type": "array", "items": { "type": "string", "enum": [ "text", "image", "satellite imagery", "video" ] }, "title": "ML Dataset Inputs type" }, "ml-dataset:annotations-type": { "type": "string", "enum": [ "raster", "vector" ], "title": "ML Dataset Annotations type" }, "ml-dataset:quality-metrics": { "type": "array", "items": { "type": "object" }, "additionalItems": true, "title": "ML Dataset quality metrics" }, "ml-dataset:version": { "type": "string", "title": "ML Dataset version" }, "ml-dataset:splits": { "type": "array", "items": { "type": "string" }, "title": "ML Dataset splits" }, "ml-dataset:split-items": { "type": "array", "items": { "type": "object" }, "title": "ML Dataset Items that conform the split" }, "ml-dataset:split": { "type": "string", "title": "ML Dataset name of the split the Item is included" } }, "patternProperties": { "^(?!file:)": { "$comment": "Above, change `template` to the prefix of this extension" } }, "additionalProperties": false } } }