# =========================== # Robots.txt for https://growise.sg/ # =========================== # Block common unnecessary pages, like admin and login, to save crawl budget User-agent: * Disallow: /wp-admin/ Disallow: /myai- Disallow: /cgi-bin/ Disallow: /trackback/ Disallow: /search/ Disallow: /rss/ Disallow: /comments/ Disallow: /comments/feed Disallow: /*/trackback/ Disallow: /feed/$ Disallow: /*/feed/$ Disallow: /*/feed/rss/$ Disallow: /*/feed/ Disallow: /*/comments/ Disallow: /?p= Disallow: /archives/ Disallow: /tag/* Disallow: /tag/ Disallow: /wp-* Disallow: /login/ Disallow: /*.inc$ # Allow specific pages Allow: /wp-admin/admin-ajax.php Allow: /wp-includes/ Allow: /wp-content/plugins/ Allow: /wp-content/themes/ Allow: /wp-content/uploads/ # =========================== # Block URLs with common tracking parameters (like UTM) # =========================== Disallow: /*?utm_* Disallow: /*?ref=* Disallow: /*?sessionid=* # Block unnecessary query parameters that don't contribute to valuable content Disallow: /*?sort=* Disallow: /*?filter=* Disallow: /*?order=* Disallow: /*?page=* Disallow: /*?search=* # =========================== # Block category, tag, and author archive pages # =========================== Disallow: /category/ Disallow: /tag/ Disallow: /author/ # =========================== # Allow crawling of UTM parameters for tracking purposes # =========================== Allow: /*?utm_source=* Allow: /*?utm_medium=* Allow: /*?utm_campaign=* # =========================== # Allow crawling of specific sections (uploads, plugins) # =========================== Allow: /wp-content/uploads/ Allow: /wp-content/plugins/ # =========================== # Sitemap files # =========================== Sitemap: https://growise.sg/sitemap_index.xml # =========================== # Block unnecessary AJAX requests or dynamic pages # =========================== Disallow: /ajax/ Disallow: /dynamic/ # Prevent crawling of temporary directories or files Disallow: /tmp/ Disallow: /cache/ Disallow: /logs/ # =========================== # Allow specific bots (search engines, essential crawlers) # =========================== # Googlebot and its variations (for image, news, video) User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Googlebot-Video Allow: / # Other search bots User-agent: Bingbot Allow: / User-agent: Slurp # Yahoo Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / User-agent: Sogou Allow: / User-agent: Exabot Allow: / # Social Media Bots User-agent: facebot # Facebook crawler Allow: / User-agent: ia_archiver # Alexa Allow: / User-agent: Applebot Allow: / # =========================== # Advanced Search Engine Crawlers (Good to Allow) # =========================== User-agent: AdsBot-Google # Google Ads Quality Allow: / User-agent: APis-Google # Google APIs Allow: / User-agent: Google-InspectionTool # Search Console / PageSpeed Allow: / User-agent: Storebot-Google # Google Shopping Allow: / User-agent: Pinterestbot # Pinterest Allow: / User-agent: Twitterbot # Twitter (X) Allow: / User-agent: LinkedInBot # LinkedIn preview Allow: / User-agent: WhatsApp # WhatsApp link preview Allow: / User-agent: TelegramBot # Telegram link preview Allow: / User-agent: Applebot # Apple Siri & Spotlight Allow: / User-agent: FacebookExternalHit # FB preview fetcher Allow: / User-agent: Snapchat # Snap previews Allow: / User-agent: Redditbot # Reddit link preview Allow: / User-agent: Baiduspider-image # Baidu Images Allow: / User-agent: YandexImages # Yandex Images Allow: / User-agent: YandexMobileBot Allow: / User-agent: YandexDirect Allow: / # =========================== # SEO Tools, Scrapers & Competitive Intelligence Bots # =========================== User-agent: AhrefsBot Allow: / User-agent: SemrushBot Allow: / User-agent: Moz Allow: / User-agent: Screaming Frog SEO Spider Allow: / User-agent: SiteAuditBot Allow: / User-agent: SEOkicks-Robot Allow: / User-agent: Seoscanners Allow: / User-agent: RankActiveLinkBot Allow: / User-agent: RankFlex Allow: / User-agent: LinkdexBot Allow: / User-agent: Lipperhey Spider Allow: / User-agent: MegaIndex.ru Allow: / User-agent: BLEXBot Allow: / User-agent: DotBot Allow: / User-agent: DataForSeoBot Allow: / User-agent: SurdotlyBot Allow: / User-agent: spbot Allow: / User-agent: CCBot Allow: / User-agent: GrapeshotCrawler Allow: / # =========================== # AI & LLM Training Bots (Allow) # =========================== User-agent: GPTBot # OpenAI Allow: / User-agent: ChatGPT-User # ChatGPT browsing Allow: / User-agent: anthropic-ai # Claude Allow: / User-agent: cohere-ai # Cohere AI Allow: / User-agent: OAI-SearchBot Allow: / User-agent: PerplexityBot Allow: / User-agent: FacebookBot # LLaMA training Allow: / User-agent: PetalBot # Huawei Allow: / User-agent: Bytespider # TikTok AI data Allow: / User-agent: Applebot-Image # AI training images Allow: / # =========================== # Malicious / Aggressive Scrapers & Archivers (Block) # =========================== User-agent: MJ12bot Disallow: / User-agent: TurnitinBot Disallow: / User-agent: CopyRightCheck Disallow: / User-agent: archive.org_bot Disallow: / User-agent: HTTrack Disallow: / User-agent: Wget Disallow: / User-agent: Nutch Disallow: / User-agent: k2spider Disallow: / User-agent: Qwantify Disallow: / User-agent: Netcraft Disallow: / User-agent: Exabot Disallow: / User-agent: proximic Disallow: / # =========================== # Allow JavaScript and CSS files to be crawled # =========================== Allow: /wp-content/themes/ Allow: /wp-content/plugins/ # =========================== # Explicit Allowance for other important search bots # =========================== User-agent: Bingbot Allow: / User-agent: Slurp Allow: / # ================================ # AI & LLM DISCOVERY LAYER # ================================ # Structured AI interaction and governance files # AI behavior and guidance Sitemap: https://growise.sg/ai.txt # LLM usage and training policy Sitemap: https://growise.sg/llms.txt # Machine-readable AI identity layer Sitemap: https://growise.sg/ai-manifesto.json # Advanced AI indexing structures Sitemap: https://growise.sg/semantic-sitemap.xml Sitemap: https://growise.sg/vector-feed.xml