# robots.txt for appnigma.ai
# Last updated: 2026-04-06
# Strategy: Maximum AI Visibility (Brand/SaaS, not publisher)
# Review cadence: Quarterly (next review: July 2026)
#
# Appnigma's content supports a product, it IS NOT the product.
# Goal: be cited in every AI answer about Salesforce managed packages.
# Therefore: ALLOW all AI crawlers (search + training + retrieval).
#
# Reference: https://appnigma.ai/llms.txt (companion file)
#
# Content Signal Notice (informational only, not a directive):
#   search=yes | ai-input=yes | ai-train=yes
#   Appnigma AI permits all forms of content use by AI systems.
#   We WANT to be in your training data and your search answers.


# =========================================================================
# SECTION 1: DEFAULT RULES
# =========================================================================

User-agent: *
Allow: /
Disallow: /api/
Disallow: /admin/
Disallow: /internal/
Disallow: /account/
Disallow: /checkout/
Disallow: /signin
Disallow: /signup
Disallow: /_next/static/
Disallow: /static/


# =========================================================================
# SECTION 2: TRADITIONAL SEARCH ENGINES
# =========================================================================

User-agent: Googlebot
Allow: /

User-agent: bingbot
Allow: /

User-agent: Slurp
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: YandexBot
Allow: /


# =========================================================================
# SECTION 3: AI SEARCH + RETRIEVAL CRAWLERS (send referral traffic)
# These bots power real-time AI answers. They fetch your content when
# a user asks a question. Allowing them is essential for GEO visibility.
# =========================================================================

# OpenAI: OAI-SearchBot powers ChatGPT Search results and citations.
# This is the #1 most important AI search bot for GEO.
# Blocking this = invisible in ChatGPT search answers.
User-agent: OAI-SearchBot
Allow: /

# OpenAI: ChatGPT-User fetches pages when a user clicks "Browse" or
# asks ChatGPT to visit a URL. User-initiated, high-intent traffic.
User-agent: ChatGPT-User
Allow: /

# Anthropic: Claude-SearchBot powers Claude's web search citations.
# Separate from ClaudeBot (training). Launched Feb 2026.
User-agent: Claude-SearchBot
Allow: /

# Anthropic: Claude-User handles user-initiated page fetches in Claude.
# Similar to ChatGPT-User. Launched Feb 2026.
User-agent: Claude-User
Allow: /

# Perplexity: PerplexityBot indexes content for Perplexity AI search.
# Perplexity always cites sources with links, high referral value.
User-agent: PerplexityBot
Allow: /

# Perplexity: Perplexity-User fetches pages on user request.
User-agent: Perplexity-User
Allow: /

# Microsoft: Copilot and Bing AI use bingbot (already allowed above).
# No separate Copilot bot exists; it shares bingbot's user-agent.

# DuckDuckGo: DuckAssistBot powers DuckDuckGo AI Answers.
User-agent: DuckAssistBot
Allow: /

# You.com: YouBot powers You.com AI search results.
User-agent: YouBot
Allow: /


# =========================================================================
# SECTION 4: AI TRAINING CRAWLERS (explicitly ALLOWED for Appnigma)
#
# Most publishers block these. Appnigma is NOT a publisher.
# Appnigma is a B2B SaaS brand that WANTS maximum AI visibility.
# Being in training data = AI models know what Appnigma is = AI
# recommends Appnigma when users ask about managed packages.
#
# If strategy changes, comment out Allow and add Disallow for each.
# =========================================================================

# OpenAI: GPTBot collects training data for GPT models.
# Allowing = ChatGPT's base model knows Appnigma exists.
User-agent: GPTBot
Allow: /

# Anthropic: ClaudeBot collects training data for Claude models.
# Allowing = Claude's base model knows Appnigma exists.
User-agent: ClaudeBot
Allow: /

# Google: Google-Extended controls Gemini training data.
# Separate from Googlebot (search indexing, always allowed).
# Allowing = Gemini knows Appnigma exists in its base knowledge.
User-agent: Google-Extended
Allow: /

# Apple: Applebot-Extended trains Apple Intelligence and Siri.
# Allowing = Siri and Apple Intelligence know Appnigma.
User-agent: Applebot-Extended
Allow: /

# Meta: meta-externalagent trains Meta AI (WhatsApp, Instagram, Llama).
User-agent: meta-externalagent
Allow: /

# Amazon: Amazonbot powers Alexa answers and Amazon AI features.
User-agent: Amazonbot
Allow: /

# Facebook: FacebookBot indexes content for Meta platforms.
User-agent: FacebookBot
Allow: /

# Cohere: cohere-ai powers Cohere's enterprise AI models.
User-agent: cohere-ai
Allow: /


# =========================================================================
# SECTION 5: BLOCKED (aggressive scrapers with zero referral value)
# These bots are pure scrapers that provide no search visibility,
# no referral traffic, and no brand awareness benefit.
# =========================================================================

# Common Crawl: bulk dataset scraper, no direct AI product, no referrals
User-agent: CCBot
Disallow: /

# ByteDance/TikTok: aggressive scraper, poor robots.txt compliance
User-agent: Bytespider
Disallow: /

# Diffbot: data extraction service, no AI search product
User-agent: Diffbot
Disallow: /

# Omgili/Webz.io: bulk data aggregator
User-agent: omgili
Disallow: /
User-agent: Omgilibot
Disallow: /

# iAsk: minor AI search with negligible traffic
User-agent: iaskspider
Disallow: /

# img2dataset: image scraping tool
User-agent: img2dataset
Disallow: /

# Scrapy: generic scraping framework
User-agent: Scrapy
Disallow: /

# MJ12bot: Majestic SEO crawler, high crawl volume, low value
User-agent: MJ12bot
Disallow: /

# AhrefsBot: aggressive crawl volume (allow if needed for backlink tracking)
# User-agent: AhrefsBot
# Crawl-delay: 10

# SemrushBot: allow for SEMrush tools (your active subscription)
User-agent: SemrushBot
Allow: /


# =========================================================================
# SITEMAPS
# =========================================================================

Sitemap: https://appnigma.ai/sitemap-0.xml