# ========================== # Main Search Engine Crawlers # ========================== User-agent: * # Block system, admin, and backend pages Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /cgi-bin/ # Block duplicate/low-value archive and utility pages Disallow: /?s= # Search results Disallow: /tag/ # Tag archives Disallow: /author/ # Author archives Disallow: /feed/ # RSS feeds Disallow: /comments/ # Comment pages Disallow: /*?replytocom Disallow: /*?utm_ # Block UTM parameter URLs Disallow: /*?share= # Social share duplicates # Allow important static resources (needed for rendering) Allow: /wp-content/uploads/ Allow: /wp-content/themes/ Allow: /wp-content/plugins/ # ========================== # AI / Data Crawlers Allowed # ========================== # OpenAI's ChatGPT crawler User-agent: ChatGPT-User Allow: / # Google's AI crawler (Bard/Gemini) User-agent: Google-Extended Allow: / # CommonCrawl (used for AI training datasets) User-agent: CCBot Allow: / # Claude AI (Anthropic) User-agent: ClaudeBot Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # Amazon Bot (AWS + AI services) User-agent: Amazonbot Allow: / # ByteDance AI crawler (TikTok / Bytespider) User-agent: Bytespider Allow: / # Facebook / Meta AI User-agent: FacebookBot Allow: / # ========================== # Sitemaps # ========================== Sitemap: https://www.gangarealty.com/sitemap.xml Sitemap: https://www.gangarealty.com/sitemap-blog.xml