# robots.txt for Miraheze # Throttle access to certain pages # Do not include special pages and other pages where indexing is undesirable if they are likely to be linked to; use noindex instead. # That's because Google can still index pages in here without crawling them if the pages are linked to # See https://developers.google.com/search/docs/crawling-indexing/robots/intro User-Agent: * Allow: /w/api.php?action=mobileview& Allow: /w/load.php? Disallow: /w/ Disallow: /geoip$ Disallow: /rest_v1/ # Block SemrushBot User-Agent: SemrushBot Disallow: / # Block AhrefsBot User-agent: AhrefsBot Disallow: / # Block Bytespider User-agent: Bytespider Disallow: / # Block PetalBot User-agent: PetalBot Disallow: / # Block DotBot User-agent: DotBot Disallow: / # Block MegaIndex User-agent: MegaIndex Disallow: / # Block serpstatbot User-agent: serpstatbot Disallow: / # Block Barkrowler User-agent: Barkrowler Disallow: / # Block SeekportBot User-agent: SeekportBot Disallow: / # Keep Crawl-Delay rules at the bottom # Bots that don't understand Crawl-Delay might break when encountering it # See https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129 (English) and https://webtan.impress.co.jp/e/2022/11/04/43611 (Japanese) # Throttle MJ12Bot User-agent: MJ12bot Crawl-Delay: 10 # Throttle YandexBot # TODO: Crawl-delay is not respected since 2018 User-Agent: YandexBot Crawl-Delay: 2.5 # Throttle BingBot User-agent: bingbot Crawl-delay: 20 # # #----------------------------------------------------------# # # # # Dynamic sitemap url Sitemap: https://dlfm-wiki.top/sitemap.xml # # #----------------------------------------------------------# # # #