{
  "site": "https://better-robots.com/",
  "updated": "2026-03-24",
  "categories": [
    {
      "id": "search_crawlers",
      "label": "Search crawlers",
      "intent": [
        "discovery",
        "index refresh",
        "search visibility"
      ],
      "trigger_mode": [
        "automatic"
      ],
      "primary_control_surfaces": [
        "robots.txt",
        "meta robots",
        "x-robots-tag"
      ]
    },
    {
      "id": "training_crawlers_or_tokens",
      "label": "Training crawlers or training tokens",
      "intent": [
        "training collection",
        "future model improvement",
        "downstream reuse policy"
      ],
      "trigger_mode": [
        "automatic",
        "mixed"
      ],
      "primary_control_surfaces": [
        "robots.txt",
        "usage tokens",
        "vendor-specific product controls"
      ]
    },
    {
      "id": "answer_or_retrieval_systems",
      "label": "Answer or retrieval systems",
      "intent": [
        "answer generation",
        "grounding",
        "retrieval",
        "search-answer quality"
      ],
      "trigger_mode": [
        "automatic",
        "mixed"
      ],
      "primary_control_surfaces": [
        "robots.txt",
        "usage signals",
        "llms.txt",
        "vendor-specific answer controls"
      ]
    },
    {
      "id": "user_triggered_fetchers",
      "label": "User-triggered fetchers",
      "intent": [
        "user-directed retrieval",
        "on-demand visits",
        "task execution"
      ],
      "trigger_mode": [
        "user_triggered"
      ],
      "primary_control_surfaces": [
        "vendor policy",
        "infrastructure rules",
        "context-specific robots handling"
      ]
    },
    {
      "id": "signed_or_verified_agents",
      "label": "Signed or verified agents",
      "intent": [
        "authenticated access",
        "verified automation",
        "runtime actions"
      ],
      "trigger_mode": [
        "mixed",
        "verified"
      ],
      "primary_control_surfaces": [
        "CDN allowlisting",
        "WAF rules",
        "request verification",
        "runtime permissions"
      ]
    },
    {
      "id": "archive_bots",
      "label": "Archive bots",
      "intent": [
        "snapshot capture",
        "preservation",
        "replay"
      ],
      "trigger_mode": [
        "automatic"
      ],
      "primary_control_surfaces": [
        "robots.txt",
        "archive-specific controls"
      ]
    },
    {
      "id": "seo_tool_bots",
      "label": "SEO tool bots",
      "intent": [
        "SEO research",
        "crawl analysis",
        "competitive monitoring"
      ],
      "trigger_mode": [
        "automatic"
      ],
      "primary_control_surfaces": [
        "robots.txt",
        "rate controls",
        "vendor allow/deny policy"
      ]
    },
    {
      "id": "low_value_or_abusive_bots",
      "label": "Low-value or abusive bots",
      "intent": [
        "extraction",
        "resource load",
        "low-value harvesting",
        "abuse"
      ],
      "trigger_mode": [
        "automatic",
        "mixed",
        "unclear"
      ],
      "primary_control_surfaces": [
        "robots.txt",
        "WAF rules",
        "rate limiting",
        "infrastructure abuse controls"
      ]
    }
  ],
  "decision_dimensions": [
    "discovery value",
    "reuse or extraction risk",
    "trigger mode",
    "primary control surface",
    "verifiability",
    "infrastructure cost",
    "reversibility"
  ],
  "operational_rules": [
    "Classify machine roles before classifying brands.",
    "Do not assume one vendor maps to one control surface.",
    "Separate search, training, answer retrieval, and user-triggered access before publishing policy.",
    "Do not infer technical enforcement or agent authenticity from taxonomy alone."
  ]
}
