# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# -----------------------------------------------------------------------------
# Robots.txt for TextToolkitHub - Production Edition
# Optimized for Googlebot, Bingbot, and modern Search Engine Crawlers
# Last Updated: 2026-06-06
# -----------------------------------------------------------------------------

# Explicitly ensure social spiders can render high-resolution deck previews
User-agent: Twitterbot
Allow: /

User-agent: facebookexternalhit
Allow: /

User-agent: *
# Allow comprehensive client-side indexing of public routes and assets
Allow: /
Allow: /index.html
Allow: /assets/
Allow: /favicon.ico
Allow: /favicon-*.png
Allow: /apple-touch-icon.png
Allow: /site.webmanifest

# Disallow crawling of code dependencies, scripts, build-folders, and raw settings
Disallow: /dist/
Disallow: /scripts/
Disallow: /src/
Disallow: /node_modules/
Disallow: /metadata.json
Disallow: /package.json
Disallow: /package-lock.json
Disallow: /tsconfig.json
Disallow: /vite.config.ts

# Disallow internal search query loops, diagnostic parameters, or state variations
Disallow: /*?*
Disallow: /*_fragment
Disallow: /*_error

# Block aggressive AI scrapers & bulk commercial scrapers to preserve system resources
User-agent: GPTBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: Anthropic-AI
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: PetalBot
Disallow: /

# Reference absolute dynamically generated sitemap layout
Sitemap: https://texttoolkithub.com/sitemap.xml