#
# # robots.txt for http://www.wikipedia.org/ and http://www.hakanberg.com/ # Please check any changes using a syntax validator such as http://tool.motoricerca.info/robots-checker.phtml # # advertising-related bots: #User-agent: Mediapartners-Google* #Disallow: / # Crawlers that are kind enough to obey, but which we'd rather not have # unless they're feeding search engines. User-agent: UbiCrawler Disallow: / User-agent: DOC Disallow: / User-agent: Zao Disallow: / # Some bots are known to be trouble, particularly those designed to copy # entire sites. Please obey robots.txt. User-agent: sitecheck.internetseer.com Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebCopier User-agent: WebCopier v.2.2 User-agent: WebCopier v3.2a Disallow: / User-agent: Fetch Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZIP User-agent: WebZIP/4.21 User-agent: WebZIP/5.0 User-agent: WebZip/4.0 Disallow: / User-agent: linko Disallow: / User-agent: HTTrack User-agent: HTTrack 3.0 Disallow: / User-agent: Microsoft.URL.Control User-agent: Microsoft URL Control User-agent: Microsoft URL Control - 5.01.4511 User-agent: Microsoft URL Control - 6.00.8169 Disallow: / User-agent: Xenu User-agent: Xenu's Link Sleuth 1.1c User-agent: Xenu's Disallow: / User-agent: larbin Disallow: / User-agent: libwww Disallow: / User-agent: ZyBORG Disallow: / User-agent: Download Ninja Disallow: / # # Sorry, wget in its recursive mode is a frequent problem. # Please read the man page and use it properly; there is a # --wait option you can use to set the delay between hits, # for instance. # User-agent: wget User-agent: Wget User-agent: Wget/1.11.3 User-agent: Wget/1.5.3 User-agent: Wget/1.5.3.1 User-agent: Wget/1.6 User-agent: wget* Disallow: / # # The 'grub' distributed client has been *very* poorly behaved. # User-agent: grub-client Disallow: / # # Doesn't follow robots.txt anyway, but... # User-agent: k2spider Disallow: / # # Hits many times per second, not acceptable # http://www.nameprotect.com/botinfo.html User-agent: NPBot Disallow: / # A capture bot, downloads gazillions of pages with no public benefit # http://www.webreaper.net/ User-agent: WebReaper Disallow: / # # Tillagda av Tom Stone # User-agent: Firebat 2.7.13 Disallow: / User-agent: EmailWolf User-agent: EmailWolf 1.00 Disallow: / User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: * Disallow: /xml/ #