{ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://www.bitsky.ai/schemas/task.json", "type": "object", "title": "Task Schema", "version": "1.0.0", "required": ["retailer", "url"], "additionalProperties": false, "properties": { "system": { "type": "object", "description": "Following properties are updated by system, user cannot direct update them", "additionalProperties": false, "properties": { "state": { "title": "State", "type": "string", "description": "State of this producer", "enum": [ "DRAFT", "CONFIGURED", "FINISHED", "RUNNING", "FAILED", "PAUSED", "TIMEOUT" ] }, "securityKey": { "$ref": "#/definitions/securityKey" }, "created": { "$ref": "#/definitions/created" }, "modified": { "$ref": "#/definitions/modified" }, "startedAt": { "allOf": [ { "$ref": "#/definitions/timestamp" }, { "title": "Task started at" } ] }, "endedAt": { "allOf": [ { "$ref": "#/definitions/timestamp" }, { "title": "Task finished at" } ] }, "producer": { "title": "Execute Producer", "description": "Which producer to execute this task", "type": "object", "additionalProperties": false, "properties": { "globalId": { "$ref": "#/definitions/globalId" }, "type": { "$ref": "#/definitions/producerType" }, "retryTimes": { "type": "integer", "description": "How many times client side retried to re-execute this task" }, "startedAt": { "allOf": [ { "$ref": "#/definitions/timestamp" }, { "description": "Producer start to execute this task" } ] }, "endedAt": { "allOf": [ { "$ref": "#/definitions/timestamp" }, { "title": "Producer end to execute this task" } ] } } }, "version": { "$ref": "#/definitions/version" }, "failuresNumber": { "title": "Number of failures", "type": "integer", "default": 0, "description": "How many times this task was fail running.", "minimum": 0 }, "failuresReason": { "type": "string", "description": "Producer collect fail reason" } } }, "_id": { "$ref": "#/definitions/_id" }, "globalId": { "$ref": "#/definitions/globalId" }, "type": { "title": "Task Type", "type": "string", "default": "CRAWLER", "description": "Type of this task, this type will help producer to decide how to run it. For now, we only have one type **web crawler**", "enum": ["CRAWLER"] }, "name": { "$ref": "#/definitions/name" }, "description": { "$ref": "#/definitions/description" }, "retailer": { "type": "object", "description": "After producer execute this task, which Retailer to send **callback** to", "required": ["globalId"], "properties": { "globalId": { "$ref": "#/definitions/globalId" }, "state": { "$ref": "#/definitions/retailerState" } } }, "permission": { "$ref": "#/definitions/permission" }, "priority": { "type": "integer", "minimum": 1, "default": 100, "description": "Priority of this task. Only compare priority for same Retailer, doesn't compare cross Retailer. Big value low priority. Priority value 1 is higher than priority value 2." }, "suitableProducers": { "type": "array", "description": "What kind of producer types can execute this task", "default": ["HEADLESSBROWSER"], "items": { "$ref": "#/definitions/producerType" }, "minItems": 1, "uniqueItems": true }, "url": { "$ref": "#/definitions/url" }, "metadata": { "type": "object", "additionalProperties": true, "description": "Additional metadata for this task", "properties": { "script": { "type": "string", "description": "Code want to execute after **window.onload**. You only need to pass function body. And Default you will get following parameters - **resolve**, **reject**, **task**, **axios**. You need to resolve(data) or reject(err), and will send data or err to Retailer. If you need producer to execute your code, Only work with **HEADLESSBROSWER** producer" } } }, "dataset": { "$ref": "#/definitions/dataset" } }, "definitions": { "_id": { "title": "ID", "description": "identifier of this entitle", "type": ["string", "object"] }, "created": { "allOf": [ { "$ref": "#/definitions/timestamp" }, { "title": "Created at" } ] }, "modified": { "allOf": [ { "$ref": "#/definitions/timestamp" }, { "title": "Last time modified at" } ] }, "timestamp": { "type": ["integer", "null"], "description": "Timestamp of when this task was assigned to an producer", "examples": [1553974629109] }, "version": { "title": "Version", "description": "version of this producer", "type": "string", "pattern": "^\\d+\\.\\d+\\.\\d+$", "default": "1.0.0", "examples": ["1.0.0"] }, "globalId": { "title": "Global ID", "description": "globally unique identifier, if you don't pass, then it will automatically generate one", "type": "string", "examples": ["59f43b55-46a3-4efc-a960-018bcca91f46"] }, "producerType": { "title": "Producer Type", "type": "string", "description": "Producer type of this producer", "enum": ["HTTP", "HEADLESSBROWSER"] }, "permission": { "type": "string", "description": "Control who can run this task. Private mean only matched **securityKey** can run, public mean any matched producer can collect. If your metadata has script, then it will be forced to private.", "default": "PRIVATE", "enum": ["PRIVATE", "PUBLIC"] }, "url": { "type": "string", "format": "uri", "description": "Target URL for this task.", "examples": ["https://json-schema.org"] }, "name": { "title": "Name", "description": "Give a meaningful name", "type": "string", "minLength": 1, "maxLength": 100, "examples": ["My Chrome Extension"] }, "description": { "title": "Description", "description": "description of this producer", "type": "string", "minLength": 1, "maxLength": 200, "examples": ["My chrome extension used for test purpose"] }, "securityKey": { "title": "Security Key", "type": "string", "description": "Security key is used to identify different user" }, "state": { "title": "State", "type": "string", "description": "State of this entitle", "enum": ["DRAFT", "CONFIGURED", "ACTIVE", "DELETED"] }, "retailerState": { "title": "Retailer State", "type": "string", "description": "State of this entitle", "enum": ["ACTIVE", "FAILED"] }, "dataset": { "type": "object", "additionalProperties": true, "description": "All data that was collected by producer when it executed task. You also can use **metadata.script** to add more properties", "properties": { "url": { "type": "string", "description": "The finally URL that collect this information. Sometime, previous URL maybe redirect.", "format": "uri", "examples": ["https://json-schema.org"] }, "data": { "type": "object", "description": "Collect ", "properties": { "contentType": { "type": "string", "title": "Content Type", "description": "Content Type of return data", "default": "html", "enum": ["html", "json", "text"] }, "content": { "oneOf": [ { "type": "string", "description": "Collect html or text string" }, { "type": "object", "description": "Collect JSON data" } ] } } } } } } }