# =====================================================================
# Comprehensive Robots.txt by Razib Marketing
#
# The purpose of this file is to explicitly allow all major search
# engines, AI bots, and other common crawlers. The final "catch-all"
# rule ensures that any bot not listed here is also allowed.
# =====================================================================

# How to read this file:
# "User-agent" specifies the bot.
# "Allow: /" grants the bot full access to all content.
# "Disallow:" (with nothing after it) also grants full access.

# -------------------------------------------------
# Major Search Engines
# -------------------------------------------------
User-agent: Googlebot
Allow: /

User-agent: bingbot
Allow: /

User-agent: Yahoo! Slurp
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: YandexBot
Allow: /

User-agent: Sogou web spider
Allow: /

# -------------------------------------------------
# Major AI & Data Crawlers (includes your requests)
# -------------------------------------------------
# Google AI (Gemini, Vertex AI, etc.). Google-Extended prevents use
# for training future models, but allowing it ensures access.
User-agent: Google-Extended
Allow: /

# OpenAI (ChatGPT)
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic (Claude)
User-agent: ClaudeBot
Allow: /

# Perplexity AI
User-agent: PerplexityBot
Allow: /

# Apple AI
User-agent: Applebot-Extended
Allow: /

# Meta AI (Facebook/Instagram)
User-agent: meta-externalagent
Allow: /

# Other AI and Data Crawlers
User-agent: CCBot
Allow: /

User-agent: Cohere-ai
Allow: /

User-agent: aiHitBot
Allow: /

User-agent: Bytespider
Allow: /

# -------------------------------------------------
# Other Common Bots & Spiders (includes your requests)
# -------------------------------------------------
User-agent: Applebot
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: facebot
Allow: /

User-agent: TikTokSpider
Allow: /

User-agent: AhrefsBot
Allow: /

User-agent: SemrushBot
Allow: /

User-agent: MJ12bot
Allow: /

User-agent: DotBot
Allow: /

User-agent: Exabot
Allow: /

User-agent: ia_archiver
Allow: /

User-agent: Cotoyogi
Allow: /

User-agent: Factset_spyderbot
Allow: /

User-agent: FirecrawlAgent
Allow: /


# =====================================================================
# CATCH-ALL RULE: IMPORTANT
#
# This final rule applies to ALL user-agents, including any not
# listed above. An empty "Disallow" directive grants full access.
# This ensures that even new and unlisted bots can crawl your site.
# =====================================================================
User-agent: *
Disallow:


# =====================================================================
# Website Sitemap
# =====================================================================

Sitemap: https://website.com/sitemap.xml
LLM Sitemap: https://website.com/llms.txt