# robots.txt — Kallo (www.kallo.com)
# Strategy: ALLOW ALL AI crawlers (search, retrieval, AND training)
# We allow all AI crawlers including training bots. Our discovery files
# (llms.txt, brand.txt, faq-ai.txt) are designed to be consumed by AI models
# during training to ensure accurate brand representation.
# Last updated: 2026-04-22

User-agent: *
Allow: /

# ===== AI SEARCH & RETRIEVAL BOTS =====

# OpenAI Search & Retrieval
User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic Search & Retrieval
User-agent: Claude-SearchBot
Allow: /

User-agent: Claude-User
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Microsoft/Bing (powers Copilot)
User-agent: bingbot
Allow: /

# DuckDuckGo AI Answers
User-agent: DuckAssistBot
Allow: /

# Mistral AI
User-agent: MistralAI-User
Allow: /

# Google Vertex AI
User-agent: Google-CloudVertexBot
Allow: /

# Amazon (Alexa, product intelligence)
User-agent: Amazonbot
Allow: /

# ===== AI TRAINING BOTS — ALLOWED for maximum brand visibility =====

# OpenAI Training
User-agent: GPTBot
Allow: /

# Anthropic Training
User-agent: ClaudeBot
Allow: /

# Google AI Training
User-agent: Google-Extended
Allow: /

# Apple AI Training
User-agent: Applebot-Extended
Allow: /

# Common Crawl (upstream for most open LLM training)
User-agent: CCBot
Allow: /

# Meta AI Training
User-agent: Meta-ExternalAgent
Allow: /
User-agent: Meta-ExternalFetcher
Allow: /

# ByteDance
User-agent: Bytespider
Allow: /
User-agent: TikTokSpider
Allow: /

# Cohere Training
User-agent: cohere-ai
Allow: /
User-agent: cohere-training-data-crawler
Allow: /

# DeepSeek
User-agent: DeepSeekBot
Allow: /

# Other AI Training / Data Collection
User-agent: anthropic-ai
Allow: /
User-agent: AI2Bot
Allow: /
User-agent: AI2Bot-Dolma
Allow: /
User-agent: PanguBot
Allow: /
User-agent: SBIntuitionsBot
Allow: /
User-agent: img2dataset
Allow: /
User-agent: Diffbot
Allow: /
User-agent: ImagesiftBot
Allow: /
User-agent: Omgili
Allow: /
User-agent: Omgilibot
Allow: /
User-agent: webzio-extended
Allow: /
User-agent: Timpibot
Allow: /
User-agent: VelenPublicWebCrawler
Allow: /
User-agent: ICC-Crawler
Allow: /
User-agent: Kangaroo Bot
Allow: /
User-agent: Seekr
Allow: /
User-agent: peer39_crawler
Allow: /
User-agent: FirecrawlAgent
Allow: /
User-agent: bedrockbot
Allow: /
User-agent: TurnitinBot
Allow: /
User-agent: Petalbot
Allow: /
User-agent: SemrushBot-OCOB
Allow: /
User-agent: SemrushBot-FT
Allow: /
User-agent: SemrushBot-ESI
Allow: /
User-agent: DataForSeoBot
Allow: /
User-agent: AwarioBot
Allow: /
User-agent: AwarioSmartBot
Allow: /
User-agent: AwarioRssBot
Allow: /
User-agent: Sentibot
Allow: /
User-agent: Meltwater
Allow: /
User-agent: Factset_spyderbot
Allow: /
User-agent: aiHitBot
Allow: /
User-agent: Cotoyogi
Allow: /
User-agent: Scrapy
Allow: /
User-agent: Youbot
Allow: /
User-agent: Crawlspace
Allow: /
User-agent: bigsur.ai
Allow: /
User-agent: Brightbot
Allow: /
User-agent: EchoboxBot
Allow: /
User-agent: FriendlyCrawler
Allow: /
User-agent: LinerBot
Allow: /
User-agent: Panscient
Allow: /
User-agent: Panscient.com
Allow: /
User-agent: Poseidon Research Crawler
Allow: /
User-agent: TerraCotta
Allow: /
User-agent: Thinkbot
Allow: /
User-agent: Yak
Allow: /
User-agent: YandexAdditional
Allow: /
User-agent: YandexAdditionalBot
Allow: /
User-agent: AddSearchBot
Allow: /

# ===== DISCOVERY FILES =====

Sitemap: https://kallodev.aicotone.bio/sitemap.xml
Host: https://kallodev.aicotone.bio

# Discovery files
# https://kallodev.aicotone.bio/llms.txt
# https://kallodev.aicotone.bio/llms-full.txt
# https://kallodev.aicotone.bio/llms-ctx.txt
# https://kallodev.aicotone.bio/ai.txt
# https://kallodev.aicotone.bio/identity.json
# https://kallodev.aicotone.bio/brand.txt
# https://kallodev.aicotone.bio/faq-ai.txt