openapi: 3.0.3 info: title: Import.io API description: | The Import.io API (v2.0) provides programmatic access to create, manage, and run web data extractors, retrieve crawl run results, and manage reports. Everything available in the Import.io UI can be accomplished via the API, including scheduling extractions, managing data pipelines, and exporting data in JSON, CSV, or Excel formats. Authenticated extraction is supported for data behind login screens. Authentication is performed by passing your API key as the `_apikey` query parameter on every request. You can find your API key in the Import.io dashboard under User Settings. version: "2.0" contact: name: Import.io Support url: https://help.import.io/ license: name: Proprietary url: https://www.import.io/terms-of-use/ servers: - url: https://api.import.io description: Production API security: - apiKeyAuth: [] tags: - name: User description: Information about the authenticated user and their subscription. - name: Extractor description: Create, configure, run, and manage web data extractors. - name: Crawlrun description: Retrieve crawl run executions and their result files. - name: Report description: Manage reports built on top of extractors. - name: ReportRun description: Retrieve report run executions and their result files. paths: /users/current: get: tags: [User] summary: Get current user description: Get information on the currently authenticated user. operationId: getCurrentUser parameters: - $ref: '#/components/parameters/ApiKey' responses: '200': description: User information content: application/json: schema: $ref: '#/components/schemas/User' '401': $ref: '#/components/responses/Unauthorized' /users/current/subscription: get: tags: [User] summary: Get current user subscription description: Get subscription information for the current user including plan, usage, and billing details. operationId: getCurrentUserSubscription parameters: - $ref: '#/components/parameters/ApiKey' responses: '200': description: Subscription information content: application/json: schema: $ref: '#/components/schemas/Subscription' '401': $ref: '#/components/responses/Unauthorized' /extractors/: get: tags: [Extractor] summary: List extractors description: Returns a paginated list of extractors owned by the authenticated user. operationId: listExtractors parameters: - $ref: '#/components/parameters/ApiKey' - $ref: '#/components/parameters/PerPage' - $ref: '#/components/parameters/Page' - $ref: '#/components/parameters/Sort' - $ref: '#/components/parameters/SortDirection' responses: '200': description: Array of extractors content: application/json: schema: type: array items: $ref: '#/components/schemas/Extractor' '401': $ref: '#/components/responses/Unauthorized' /extractors/{extractorId}: parameters: - $ref: '#/components/parameters/ExtractorId' - $ref: '#/components/parameters/ApiKey' get: tags: [Extractor] summary: Get extractor description: Get information about a specific extractor. operationId: getExtractor responses: '200': description: Extractor details content: application/json: schema: $ref: '#/components/schemas/Extractor' '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' delete: tags: [Extractor] summary: Archive extractor description: Archive (delete) an extractor. Data is retained in the database but the extractor is hidden from the UI and cannot be run. operationId: deleteExtractor responses: '200': description: Archived extractor content: application/json: schema: $ref: '#/components/schemas/Extractor' '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' /extractors/{extractorId}/inputs: parameters: - $ref: '#/components/parameters/ExtractorId' - $ref: '#/components/parameters/ApiKey' get: tags: [Extractor] summary: Get extractor inputs description: Retrieve the current inputs configured for an extractor. operationId: getExtractorInputs responses: '200': description: Extractor inputs content: application/json: schema: $ref: '#/components/schemas/ExtractorInputs' '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' put: tags: [Extractor] summary: Update extractor inputs description: Update the inputs (URLs) configured for an extractor. operationId: updateExtractorInputs requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ExtractorInputs' responses: '200': description: Inputs updated '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' '500': $ref: '#/components/responses/ServerError' /extractors/{extractorId}/start: post: tags: [Extractor] summary: Start extractor description: Trigger an extractor to run. operationId: startExtractor parameters: - $ref: '#/components/parameters/ExtractorId' - $ref: '#/components/parameters/ApiKey' responses: '200': description: Extractor started '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' '500': $ref: '#/components/responses/ServerError' /extractors/{extractorId}/stop: post: tags: [Extractor] summary: Stop extractor description: Halt a running extractor. operationId: stopExtractor parameters: - $ref: '#/components/parameters/ExtractorId' - $ref: '#/components/parameters/ApiKey' responses: '200': description: Extractor stopped '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' '500': $ref: '#/components/responses/ServerError' /extractors/{extractorId}/duplicate: post: tags: [Extractor] summary: Duplicate extractor description: Create a copy of an existing extractor. operationId: duplicateExtractor parameters: - $ref: '#/components/parameters/ExtractorId' - $ref: '#/components/parameters/ApiKey' responses: '200': description: Duplicated extractor content: application/json: schema: $ref: '#/components/schemas/Extractor' '401': $ref: '#/components/responses/Unauthorized' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' '500': $ref: '#/components/responses/ServerError' /extractors/{extractorId}/credentials: post: tags: [Extractor] summary: Update extractor credentials description: Update the authentication credentials used by an extractor for sites behind login. operationId: updateExtractorCredentials parameters: - $ref: '#/components/parameters/ExtractorId' - $ref: '#/components/parameters/ApiKey' requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/Credentials' responses: '200': description: Credentials updated '401': $ref: '#/components/responses/Unauthorized' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' '500': $ref: '#/components/responses/ServerError' /crawlruns/: get: tags: [Crawlrun] summary: List crawl runs description: List recent crawl runs with optional filters and pagination. operationId: listCrawlruns parameters: - $ref: '#/components/parameters/ApiKey' - $ref: '#/components/parameters/PerPage' - $ref: '#/components/parameters/Page' - $ref: '#/components/parameters/Sort' - $ref: '#/components/parameters/SortDirection' responses: '200': description: Array of crawl runs content: application/json: schema: type: array items: $ref: '#/components/schemas/Crawlrun' '401': $ref: '#/components/responses/Unauthorized' /crawlruns/{crawlrunId}: get: tags: [Crawlrun] summary: Get crawl run description: Retrieve information about a specific crawl run. operationId: getCrawlrun parameters: - $ref: '#/components/parameters/CrawlrunId' - $ref: '#/components/parameters/ApiKey' responses: '200': description: Crawl run details content: application/json: schema: $ref: '#/components/schemas/Crawlrun' '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' /crawlruns/{crawlrunId}/{fileType}: get: tags: [Crawlrun] summary: Get crawl run results file description: Download the result file for a crawl run in the requested format. operationId: getCrawlrunFile parameters: - $ref: '#/components/parameters/CrawlrunId' - name: fileType in: path required: true description: The file type to download. schema: type: string enum: [csv, xlsx, log, sample, json, files] - $ref: '#/components/parameters/ApiKey' responses: '200': description: Result file content: text/csv: {} application/vnd.openxmlformats-officedocument.spreadsheetml.sheet: {} application/json: {} application/x-ndjson: {} text/plain: {} application/zip: {} '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' /reports/: get: tags: [Report] summary: List reports description: List reports with optional pagination and sorting. operationId: listReports parameters: - $ref: '#/components/parameters/ApiKey' - $ref: '#/components/parameters/PerPage' - $ref: '#/components/parameters/Page' - $ref: '#/components/parameters/Sort' - $ref: '#/components/parameters/SortDirection' responses: '200': description: Array of reports content: application/json: schema: type: array items: $ref: '#/components/schemas/Report' '401': $ref: '#/components/responses/Unauthorized' /reports/{reportId}: parameters: - $ref: '#/components/parameters/ReportId' - $ref: '#/components/parameters/ApiKey' get: tags: [Report] summary: Get report description: Retrieve information about a specific report. operationId: getReport responses: '200': description: Report details content: application/json: schema: $ref: '#/components/schemas/Report' '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' delete: tags: [Report] summary: Delete report description: Delete a report. operationId: deleteReport responses: '200': description: Report deleted '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' '500': $ref: '#/components/responses/ServerError' /reports/{reportId}/start: post: tags: [Report] summary: Start report description: Execute a report. operationId: startReport parameters: - $ref: '#/components/parameters/ReportId' - $ref: '#/components/parameters/ApiKey' responses: '200': description: Report started '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' '500': $ref: '#/components/responses/ServerError' /reports/{reportId}/reportruns: get: tags: [Report] summary: List report runs for a report description: List runs for a specific report. operationId: listReportRunsForReport parameters: - $ref: '#/components/parameters/ReportId' - $ref: '#/components/parameters/ApiKey' - $ref: '#/components/parameters/PerPage' - $ref: '#/components/parameters/Page' - $ref: '#/components/parameters/Sort' - $ref: '#/components/parameters/SortDirection' responses: '200': description: Array of report runs content: application/json: schema: type: array items: $ref: '#/components/schemas/ReportRun' '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' /reportruns/: get: tags: [ReportRun] summary: List report runs description: List all report runs across reports with optional filters. operationId: listReportRuns parameters: - $ref: '#/components/parameters/ApiKey' - $ref: '#/components/parameters/PerPage' - $ref: '#/components/parameters/Page' - $ref: '#/components/parameters/Sort' - $ref: '#/components/parameters/SortDirection' responses: '200': description: Array of report runs content: application/json: schema: type: array items: $ref: '#/components/schemas/ReportRun' '401': $ref: '#/components/responses/Unauthorized' /reportruns/{reportRunId}: get: tags: [ReportRun] summary: Get report run description: Retrieve information about a specific report run. operationId: getReportRun parameters: - $ref: '#/components/parameters/ReportRunId' - $ref: '#/components/parameters/ApiKey' responses: '200': description: Report run details content: application/json: schema: $ref: '#/components/schemas/ReportRun' '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' /reportruns/{reportRunId}/{fileType}: get: tags: [ReportRun] summary: Get report run results file description: Download the result file for a report run. JSON output is only available for Change reports. operationId: getReportRunFile parameters: - $ref: '#/components/parameters/ReportRunId' - name: fileType in: path required: true description: The file type to download. schema: type: string enum: [csv, xlsx, json] - $ref: '#/components/parameters/ApiKey' responses: '200': description: Result file content: text/csv: {} application/vnd.openxmlformats-officedocument.spreadsheetml.sheet: {} application/json: {} '401': $ref: '#/components/responses/Unauthorized' '404': $ref: '#/components/responses/NotFound' components: securitySchemes: apiKeyAuth: type: apiKey in: query name: _apikey description: API key obtained from the Import.io dashboard User Settings. parameters: ApiKey: name: _apikey in: query required: true description: Your Import.io API key. schema: type: string PerPage: name: _perpage in: query required: false description: Number of items to return per page. schema: type: integer Page: name: _page in: query required: false description: Page number to return (default 1). schema: type: integer default: 1 Sort: name: _sort in: query required: false description: Field to sort by, e.g. `meta_created_at`. schema: type: string SortDirection: name: _sortDirection in: query required: false description: Sort direction. schema: type: string enum: [ASC, DESC] default: DESC ExtractorId: name: extractorId in: path required: true description: Unique identifier of the extractor. schema: type: string format: uuid CrawlrunId: name: crawlrunId in: path required: true description: Unique identifier of the crawl run. schema: type: string format: uuid ReportId: name: reportId in: path required: true description: Unique identifier of the report. schema: type: string format: uuid ReportRunId: name: reportRunId in: path required: true description: Unique identifier of the report run. schema: type: string format: uuid responses: Unauthorized: description: Authentication failed. content: application/json: schema: $ref: '#/components/schemas/Error' Forbidden: description: Forbidden. content: application/json: schema: $ref: '#/components/schemas/Error' NotFound: description: Resource not found. content: application/json: schema: $ref: '#/components/schemas/Error' ServerError: description: Server error. content: application/json: schema: $ref: '#/components/schemas/Error' schemas: Error: type: object properties: code: type: integer description: Internal error code. message: type: string description: Human-readable error description. error: type: string description: Deprecated error string. deprecated: true Meta: type: object description: Common metadata included on most resources. properties: createdAt: type: string format: date-time modifiedAt: type: string format: date-time ownerId: type: string format: uuid creatorId: type: string format: uuid editorId: type: string format: uuid User: type: object properties: guid: type: string format: uuid username: type: string firstName: type: string lastName: type: string email: type: string format: email company: type: string roles: type: array items: type: string passwordSetAt: type: string format: date-time lastLoginDateTime: type: string format: date-time loginAttemptsLeft: type: integer archived: type: boolean _meta: $ref: '#/components/schemas/Meta' Subscription: type: object properties: userId: type: string format: uuid planCode: type: string currentUsage: type: integer maxUsage: type: integer expiryDate: type: string format: date-time nextBillingDate: type: string format: date-time autoRenew: type: boolean trial: type: boolean custom: type: boolean features: type: array items: type: string overage: type: object additionalProperties: true Extractor: type: object properties: guid: type: string format: uuid name: type: string fields: type: array items: type: object additionalProperties: true tags: type: array items: type: string archived: type: boolean proxy: type: object additionalProperties: true webhooks: type: array items: type: object additionalProperties: true chainingConfig: type: object additionalProperties: true credentialsId: type: string format: uuid _meta: $ref: '#/components/schemas/Meta' ExtractorInputs: type: object properties: _url: type: string description: One or more URLs to extract data from. Multiple URLs may be newline-separated. Credentials: type: object required: [username, password] properties: username: type: string password: type: string format: password Crawlrun: type: object properties: guid: type: string format: uuid extractorId: type: string format: uuid state: type: string enum: [PENDING, STARTED, FINISHED, FAILED, CANCELLED] startedAt: type: string format: date-time stoppedAt: type: string format: date-time totalUrlCount: type: integer successUrlCount: type: integer failedUrlCount: type: integer rowCount: type: integer screenCaptureCount: type: integer htmlExtractionCount: type: integer files: type: object additionalProperties: true webhooks: type: array items: type: object additionalProperties: true _meta: $ref: '#/components/schemas/Meta' Report: type: object properties: guid: type: string format: uuid name: type: string type: type: string extractorId: type: string format: uuid latestConfigId: type: string format: uuid archived: type: boolean _meta: $ref: '#/components/schemas/Meta' ReportRun: type: object properties: guid: type: string format: uuid name: type: string type: type: string status: type: string reportId: type: string format: uuid configId: type: string format: uuid extractorId: type: string format: uuid inputs: type: object additionalProperties: true csv: type: string json: type: string log: type: string pdf: type: string xlsx: type: string summary: type: object additionalProperties: true errorMessage: type: string archived: type: boolean _meta: $ref: '#/components/schemas/Meta'