naftiko: 1.0.0-alpha2
info:
  label: Web Data Collection
  description: Unified capability for web data collection workflows using Scrapfly's scraping, screenshot, and extraction
    APIs. Enables data engineers and researchers to collect, extract, and transform web content at scale with anti-bot bypass,
    proxy rotation, and AI-assisted extraction.
  tags:
  - Web Scraping
  - Data Collection
  - Data Extraction
  - Screenshots
  - Anti-Bot
  - Proxies
  created: '2026-05-02'
  modified: '2026-05-06'
binds:
- namespace: env
  keys:
    SCRAPFLY_API_KEY: SCRAPFLY_API_KEY
capability:
  consumes:
  - type: http
    namespace: scrapfly-scrape
    baseUri: https://api.scrapfly.io
    description: Scrapfly Web Scraping API
    authentication:
      type: apikey
      key: key
      value: '{{SCRAPFLY_API_KEY}}'
      placement: query
    resources:
    - name: scraping
      path: /scrape
      description: Core web scraping endpoint
      operations:
      - name: scrape-url
        method: GET
        description: Scrape any URL with anti-bot bypass and optional JS rendering
        inputParameters:
        - name: url
          in: query
          type: string
          required: true
          description: Target URL to scrape
        - name: render_js
          in: query
          type: boolean
          required: false
          description: Enable JavaScript rendering
        - name: asp
          in: query
          type: boolean
          required: false
          description: Enable Anti Scraping Protection bypass
        - name: country
          in: query
          type: string
          required: false
          description: Proxy country (ISO alpha-2)
        - name: format
          in: query
          type: string
          required: false
          description: Output format (raw, clean_html, markdown, text)
        - name: extraction_prompt
          in: query
          type: string
          required: false
          description: LLM prompt for structured data extraction
        - name: session
          in: query
          type: string
          required: false
          description: Session name for persistent cookies
        - name: cache
          in: query
          type: boolean
          required: false
          description: Enable response caching
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
    - name: screenshot
      path: /screenshot
      description: Screenshot capture endpoint
      operations:
      - name: capture-screenshot
        method: GET
        description: Capture a screenshot of a web page or element
        inputParameters:
        - name: url
          in: query
          type: string
          required: true
          description: Target URL to screenshot
        - name: capture
          in: query
          type: string
          required: false
          description: Capture scope (fullpage or CSS selector)
        - name: format
          in: query
          type: string
          required: false
          description: Image format (png, jpeg, webp)
        - name: country
          in: query
          type: string
          required: false
          description: Proxy country for geo-specific capture
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
  exposes:
  - type: rest
    port: 8080
    namespace: web-data-collection-api
    description: Unified REST API for web data collection, scraping, and screenshot capture.
    resources:
    - path: /v1/scrape
      name: scraping
      description: Web page scraping with anti-bot bypass
      operations:
      - method: GET
        name: scrape-url
        description: Scrape a URL with configurable rendering and extraction
        call: scrapfly-scrape.scrape-url
        with:
          url: rest.url
          render_js: rest.render_js
          asp: rest.asp
          country: rest.country
          format: rest.format
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/screenshots
      name: screenshots
      description: Web page screenshot capture
      operations:
      - method: GET
        name: capture-screenshot
        description: Capture a screenshot of a webpage or element
        call: scrapfly-scrape.capture-screenshot
        with:
          url: rest.url
          capture: rest.capture
          format: rest.format
        outputParameters:
        - type: object
          mapping: $.
  - type: mcp
    port: 9090
    namespace: web-data-collection-mcp
    transport: http
    description: MCP server for AI-assisted web data collection and extraction workflows.
    tools:
    - name: scrape-webpage
      description: Scrape any webpage and return its content. Supports anti-bot bypass, JavaScript rendering for dynamic sites,
        proxy rotation across 190+ countries, and output in HTML, markdown, or plain text format.
      hints:
        readOnly: true
        openWorld: true
      call: scrapfly-scrape.scrape-url
      with:
        url: tools.url
        render_js: tools.render_js
        asp: tools.asp
        country: tools.country
        format: tools.format
      outputParameters:
      - type: object
        mapping: $.
    - name: extract-structured-data
      description: Scrape a webpage and extract structured data using an AI prompt. Returns structured JSON data extracted
        from the page content.
      hints:
        readOnly: true
        openWorld: true
      call: scrapfly-scrape.scrape-url
      with:
        url: tools.url
        extraction_prompt: tools.prompt
        render_js: tools.render_js
        asp: tools.asp
      outputParameters:
      - type: object
        mapping: $.
    - name: scrape-with-session
      description: Scrape a webpage with session persistence, maintaining cookies and browser fingerprint across multiple
        requests to the same site.
      hints:
        readOnly: true
      call: scrapfly-scrape.scrape-url
      with:
        url: tools.url
        session: tools.session
        render_js: tools.render_js
      outputParameters:
      - type: object
        mapping: $.
    - name: scrape-with-cache
      description: Scrape a URL with caching enabled to avoid redundant requests. Ideal for repeatedly accessed URLs that
        don't change frequently.
      hints:
        readOnly: true
        idempotent: true
      call: scrapfly-scrape.scrape-url
      with:
        url: tools.url
        cache: tools.cache
        cache_ttl: tools.cache_ttl
      outputParameters:
      - type: object
        mapping: $.
    - name: capture-full-page-screenshot
      description: Capture a full-page screenshot of any website. Useful for visual verification, archiving, or accessibility
        testing.
      hints:
        readOnly: true
      call: scrapfly-scrape.capture-screenshot
      with:
        url: tools.url
        capture: fullpage
        format: tools.format
      outputParameters:
      - type: object
        mapping: $.
    - name: capture-element-screenshot
      description: Capture a screenshot of a specific HTML element using a CSS selector. Useful for extracting visual data
        from specific page components.
      hints:
        readOnly: true
      call: scrapfly-scrape.capture-screenshot
      with:
        url: tools.url
        capture: tools.css_selector
        format: tools.format
      outputParameters:
      - type: object
        mapping: $.