User-agent: *
Disallow: /followers
Disallow: /following
Disallow: /admin
Disallow: /remote_interaction
Disallow: /remote_follow

## https://commoncrawl.org/faq - Has been used by ChatGPT, Bard, and others for training a number of models.
User-agent: CCBot
Disallow: /
## The bot used when a ChatGPT user instructs it to reference your website.
User-agent: ChatGPT-User
Disallow: /
## The bot that OpenAI uses to collect bulk training data for ChatGPT.
User-agent: GPTBot
Disallow: /
## Block Google from scraping your site for Bard and VertexAI.
User-agent: Google-Extended
Disallow: /
## Omgili sell data they scrape to others for their AI training.
User-agent: Omgilibot
Disallow: /
User-agent: Omgili
Disallow: /

## Meta’s bot that crawls public web pages to improve language models for their speech recognition technology.
User-agent: FacebookBot
Disallow: /

## Apple very kindly told us how to block their scraper AFTER they'd scraped everything.
User-agent: Applebot-Extended
Disallow: /

## is used by used by Anthropic to gather data for their “AI” products, such as Claude
User-agent: anthropic-ai
Disallow: /

## is another agent used by Anthropic that is more specifically related to Claude
User-agent: ClaudeBot
Disallow: /

# is a somewhat dishonest scraping bot used to collect data to train LLMs. This is their default user-agent, but they make it easy for their clients to change it to something else and ignore your wishes
User-agent: Diffbot
Disallow: /

## This is just getting stupid and I hope governments step in to wreck these tech-bro thieves.
User-agent: Bytespider
Disallow: /
User-agent: ImagesiftBot
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: cohere-ai
Disallow: /
User-agent: facebookexternalhit
Disallow: /
User-agent: facebookcatalog
Disallow: /
User-agent: meta-externalagent
Disallow: /