openapi: 3.1.0 info: title: Bright Data Web Scraper API description: | Bright Data's Web Scraper API exposes asynchronous scraping jobs (snapshots) backed by 660+ pre-built dataset endpoints plus custom collectors. Trigger a scrape, poll progress, list snapshots, cancel or rerun, and download results in JSON, NDJSON, CSV, or JSONL — optionally compressed. Snapshots can be pushed to S3, Azure Blob, GCS, Snowflake, or a webhook. Authentication uses a Bearer API token issued from the Bright Data control panel. version: v3 contact: name: Bright Data url: https://docs.brightdata.com license: name: Bright Data API Terms of Service url: https://brightdata.com/legal/tos servers: - url: https://api.brightdata.com description: Production security: - BearerAuth: [] tags: - name: Scrape description: Trigger and monitor asynchronous scraping jobs. - name: Snapshots description: List, download, cancel, and rerun snapshots produced by scraping jobs. - name: Delivery description: Deliver snapshots to cloud storage destinations or webhooks. paths: /datasets/v3/scrape: post: summary: Trigger a Web Scraper Job description: | Trigger an asynchronous scraping job against a Bright Data dataset. Pass the `dataset_id` of the target collector and the per-record input payload as a JSON array. Returns a `snapshot_id` used to poll progress and download results. operationId: triggerScrape tags: [Scrape] parameters: - name: dataset_id in: query required: true schema: { type: string } description: Bright Data dataset identifier (e.g. `gd_l1viktl72bvl7bjuj0`). - name: include_errors in: query schema: { type: boolean } - name: notify in: query schema: { type: string, format: uri } description: Webhook URL Bright Data calls when the snapshot completes. - name: format in: query schema: { type: string, enum: [json, ndjson, csv, jsonl] } - name: limit_per_input in: query schema: { type: integer } - name: limit_multiple_results in: query schema: { type: integer } requestBody: required: true content: application/json: schema: type: array items: type: object additionalProperties: true responses: "200": description: Job submitted. content: application/json: schema: type: object properties: snapshot_id: { type: string } /datasets/v3/progress/{snapshot_id}: parameters: - name: snapshot_id in: path required: true schema: { type: string } get: summary: Get Scrape Progress operationId: getScrapeProgress tags: [Scrape] responses: "200": description: Progress. content: application/json: schema: type: object properties: status: type: string enum: [running, ready, failed, building, collecting, cancelled] records: { type: integer } errors: { type: integer } /datasets/v3/log/{snapshot_id}: parameters: - name: snapshot_id in: path required: true schema: { type: string } get: summary: Get Snapshot Log operationId: getSnapshotLog tags: [Snapshots] responses: "200": description: Snapshot log. content: application/json: schema: type: array items: { type: object } /datasets/v3/snapshots: get: summary: List Snapshots operationId: listSnapshots tags: [Snapshots] parameters: - { name: dataset_id, in: query, schema: { type: string } } - { name: from_date, in: query, schema: { type: string, format: date } } - { name: to_date, in: query, schema: { type: string, format: date } } - { name: status, in: query, schema: { type: string } } responses: "200": description: Snapshots. content: application/json: schema: type: array items: { $ref: '#/components/schemas/Snapshot' } /datasets/v3/snapshot/{snapshot_id}/cancel: parameters: - name: snapshot_id in: path required: true schema: { type: string } post: summary: Cancel a Snapshot operationId: cancelSnapshot tags: [Snapshots] responses: "200": description: Cancelled. content: application/json: { schema: { type: object } } /datasets/v3/snapshot/{snapshot_id}/rerun: parameters: - name: snapshot_id in: path required: true schema: { type: string } post: summary: Rerun a Snapshot operationId: rerunSnapshot tags: [Snapshots] responses: "200": description: Snapshot rerun started. content: application/json: schema: type: object properties: snapshot_id: { type: string } /datasets/v3/snapshot/{snapshot_id}: parameters: - name: snapshot_id in: path required: true schema: { type: string } get: summary: Download Snapshot Results operationId: downloadSnapshot tags: [Snapshots] parameters: - { name: format, in: query, schema: { type: string, enum: [json, ndjson, csv, jsonl] } } - { name: compress, in: query, schema: { type: boolean } } - { name: batch_size, in: query, schema: { type: integer } } - { name: part, in: query, schema: { type: integer } } responses: "200": description: Snapshot data (up to 5 GB per request). content: application/json: schema: type: array items: { type: object } text/csv: schema: { type: string } application/x-ndjson: schema: { type: string } /datasets/v3/snapshot/{snapshot_id}/deliver: parameters: - name: snapshot_id in: path required: true schema: { type: string } post: summary: Deliver Snapshot to Cloud Storage operationId: deliverSnapshot tags: [Delivery] requestBody: required: true content: application/json: schema: type: object properties: deliver: type: object properties: type: { type: string, enum: [s3, gcs, azure, snowflake, webhook] } filename: { type: object } bucket: { type: string } credentials: { type: object, additionalProperties: true } format: { type: string, enum: [json, ndjson, csv, parquet] } compress: { type: boolean } responses: "200": description: Delivery scheduled. content: application/json: { schema: { type: object } } /datasets: get: summary: List Available Datasets operationId: listDatasets tags: [Scrape] responses: "200": description: List of datasets the caller is entitled to query. content: application/json: schema: type: array items: type: object properties: id: { type: string } name: { type: string } description: { type: string } components: securitySchemes: BearerAuth: type: http scheme: bearer schemas: Snapshot: type: object properties: id: { type: string } dataset_id: { type: string } status: { type: string } created: { type: string, format: date-time } dataset_size: { type: integer } records: { type: integer } errors: { type: integer }