# Drop this in .github/workflows/ to keep an eye on which AI crawlers your
# production site lets in. Fails the run if a search/answer bot you want gets
# blocked, or if a training bot you don't want slips through — so a stray
# Cloudflare "Block AI bots" toggle or a bad robots.txt edit can't ship silently.

name: AI Crawl Check

on:
  # Catch robots.txt regressions in PRs that touch it.
  pull_request:
    paths:
      - 'public/robots.txt'
      - 'static/robots.txt'
  # And re-check production on a schedule (CDN policies change out from under you).
  schedule:
    - cron: '0 6 * * 1' # every Monday 06:00 UTC
  workflow_dispatch: {}

jobs:
  ai-crawl-check:
    runs-on: ubuntu-latest
    steps:
      - uses: TryGeoSuite/ai-crawler-bots@v1
        with:
          url: https://example.com
          # Search + on-demand bots drive citations/traffic — they MUST stay reachable.
          assert-allowed: oai-searchbot,perplexitybot,chatgpt-user
          # Bulk training crawlers MUST stay blocked (drop this line if you opt in).
          assert-blocked: gptbot,claudebot
          # Optional overall floor on the AI-visibility score (0-100).
          fail-under: 50