{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://github.com/api-search/amazon-neptune/json-schema/amazon-neptune-loader-job-schema.json", "title": "Amazon Neptune Loader Job", "description": "Represents a Neptune bulk loader job configuration and its status. The loader ingests data from Amazon S3 into a Neptune DB instance, supporting CSV for property graphs and N-Triples, N-Quads, RDF/XML, and Turtle for RDF data.", "type": "object", "required": [ "source", "format", "iamRoleArn", "region" ], "properties": { "source": { "type": "string", "description": "Amazon S3 URI identifying the data file(s), folder, or multiple folders to load. Supported URI formats: s3://bucket/key, https://s3.amazonaws.com/bucket/key.", "examples": [ "s3://my-bucket/graph-data/", "s3://my-bucket/data/vertices.csv" ] }, "format": { "type": "string", "description": "The data format of the source files to be loaded.", "enum": [ "csv", "opencypher", "ntriples", "nquads", "rdfxml", "turtle" ] }, "iamRoleArn": { "type": "string", "description": "The ARN of the IAM role that provides Neptune access to the S3 bucket. Can be a comma-separated list of role ARNs for cross-account access.", "pattern": "^arn:aws[a-z-]*:iam::[0-9]+:role/" }, "region": { "type": "string", "description": "The AWS Region of the S3 bucket containing the data to load.", "examples": [ "us-east-1", "eu-west-1" ] }, "mode": { "type": "string", "description": "The load mode. NEW fails if data was previously loaded. RESUME continues a failed load from where it left off. AUTO resumes if possible, otherwise starts new.", "enum": [ "NEW", "RESUME", "AUTO" ], "default": "AUTO" }, "failOnError": { "type": "string", "description": "Whether to stop the entire load job when an error is encountered.", "enum": [ "TRUE", "FALSE" ], "default": "TRUE" }, "parallelism": { "type": "string", "description": "The degree of parallelism for loading. LOW uses a single thread, MEDIUM uses num_vCPU/2, HIGH uses num_vCPU, OVERSUBSCRIBE uses all available resources.", "enum": [ "LOW", "MEDIUM", "HIGH", "OVERSUBSCRIBE" ], "default": "HIGH" }, "parserConfiguration": { "type": "object", "description": "Optional parser configuration settings for RDF data.", "properties": { "baseUri": { "type": "string", "description": "The base URI for resolving relative URIs in the data." }, "namedGraphUri": { "type": "string", "description": "The default named graph URI for loaded triples." }, "allowEmptyStrings": { "type": "boolean", "description": "Whether to allow empty string values for properties." } } }, "updateSingleCardinalityProperties": { "type": "string", "description": "Whether to update existing single-cardinality vertex properties with new values. Not supported for openCypher format.", "enum": [ "TRUE", "FALSE" ], "default": "FALSE" }, "queueRequest": { "type": "string", "description": "Whether to queue the request if a load job is already running. Neptune queues up to 64 jobs in FIFO order.", "enum": [ "TRUE", "FALSE" ], "default": "FALSE" }, "dependencies": { "type": "array", "description": "An array of load job IDs that must complete successfully before this job runs.", "items": { "type": "string" } }, "userProvidedEdgeIds": { "type": "string", "description": "For openCypher format only. TRUE means edge files contain an :ID column. FALSE means Neptune auto-generates edge IDs.", "enum": [ "TRUE", "FALSE" ] } }, "$defs": { "LoaderJobStatus": { "type": "object", "title": "Loader Job Status", "description": "The status of a Neptune bulk loader job.", "properties": { "loadId": { "type": "string", "description": "The unique identifier for the load job." }, "overallStatus": { "type": "object", "properties": { "fullUri": { "type": "string", "description": "The S3 URI of the data source." }, "runNumber": { "type": "integer", "description": "The run number for this load." }, "retryNumber": { "type": "integer", "description": "The number of retries." }, "status": { "type": "string", "description": "The current status of the load job.", "enum": [ "LOAD_NOT_STARTED", "LOAD_IN_PROGRESS", "LOAD_COMPLETED", "LOAD_CANCELLED_BY_USER", "LOAD_CANCELLED_DUE_TO_ERRORS", "LOAD_FAILED", "LOAD_UNEXPECTED_ERROR", "LOAD_DATA_DEADLOCK", "LOAD_DATA_FAILED_DUE_TO_FEED_MODIFIED_OR_DELETED", "LOAD_S3_READ_ERROR", "LOAD_S3_ACCESS_DENIED_ERROR", "LOAD_COMMITTED_W_WRITE_CONFLICTS" ] }, "totalTimeSpent": { "type": "integer", "description": "Total time spent on the load in seconds." }, "startTime": { "type": "integer", "description": "The start time as a Unix timestamp." }, "totalRecords": { "type": "integer", "description": "Total number of records processed." }, "totalDuplicates": { "type": "integer", "description": "Total number of duplicate records encountered." }, "parsingErrors": { "type": "integer", "description": "Total number of parsing errors." }, "datatypeMismatchErrors": { "type": "integer", "description": "Total number of datatype mismatch errors." }, "insertErrors": { "type": "integer", "description": "Total number of insert errors." } } } } } } }