# # /robots.txt file for http://shadow.sentry.org/ # # Robots to restrict (* is all) User-agent: * Disallow: /cgi-bin/ # Unknown User-agent: NPBot Disallow: / # MSIE browser User-agent: MSIECrawler Disallow: / # MSN robot User-agent: msnbot Disallow: / # Microsoft User-agent: msnbot-media Disallow: / # Picture Indexer http://www.picsearch.com/bot.html User-agent: psbot Disallow: / # Japanese robot User-agent: Baiduspider+ Disallow: / # Telstra User-agent: Sensis Web Crawler Disallow: / # Unknown robot User-agent: appie Disallow: / # Italian robot User-agent: Iltrovatore-Setaccio Disallow: / # Contextual robot - kuloko.com "coming soon" User-agent: kuloko-bot Disallow: / # Google adwords bot User-agent: Mediapartners-Google Disallow: / # Google image bot User-agent: Googlebot-Image Disallow: / # Japanese image robot User-agent: ImageBot Disallow: / # Japanese search engine User-agent: NaverBot-1.0 Disallow: / # Japanese search engine User-agent: NaverBot_dloader Disallow: / # Polish robot User-agent: Szukacz Disallow: / # Unknown User-agent: Gigabot Disallow: / # Russian? open source search engine User-agent: DataparkSearch Disallow: / # Unknown User-agent: QuepasaCreep Disallow: / # Unknown User-agent: SurveyBot Disallow: / # Unknown User-agent: NetResearchServer Disallow: / # Unknown User-agent: Clustered-Search-Bot Disallow: / # Unknown User-agent: Scrubby Disallow: / # Unknown User-agent: VoilaBot Disallow: / # Unknown User-agent: GeonaBot Disallow: / # Unknown User-agent: Pompos Disallow: / # Chinese edu search engine # http://www.openfind.com.tw/robot.html User-agent: Openbot Disallow: / # Cluster search engine User-agent: Clushbot Disallow: / # Yahoo ignoring /docs/ exclusion # so disallow all User-agent: Slurp Disallow: / # ?? User-agent: Jetbot Disallow: / # ?? User-agent: sohu-search Disallow: / # http://www.amfibi.com User-agent: Amfibibot Disallow: / # www.updated.com User-agent: updated Disallow: / # ??? User-agent: StackRambler Disallow: / # Yahoo image crawler User-agent: Yahoo-MMCrawler Disallow: / # ??? User-agent: Knowledge.com Disallow: / # Russian robot: tankvit@e-mail.ru User-agent: booch_1.0.7 Disallow: / # http://www.WISEnutbot.com User-agent: ZyBorg Disallow: / # www.seventwentyfour.com User-agent: LinkWalker Disallow: / # http://www.become.com/webmasters.html # # http://www.become.com/site_owners.html User-agent: BecomeBot Disallow: / # http://www.tutorgig.info User-agent: TutorGigBot Disallow: / # ??? User-agent: Girafabot Disallow: / # ??? User-agent: Holmes Disallow: / # ??? User-agent: webcrawl.net Disallow: / # ??? User-agent: GoForIt.com Disallow: / # http://www.globalspec.com/Ocelli User-agent: Ocelli Disallow: / # ??? User-agent: Ultraseek Disallow: / # http://www.nutch.org/docs/en/bot.html User-agent: NutchCVS Disallow: / # http://www.eliyon.com/NextGenSearchBot User-agent: NextGenSearchBot Disallow: / # http://www.authoritativeweb.com/crawl User-agent: ConveraCrawler Disallow: / # ??? User-agent: ScSpider Disallow: / # www.earthcom.info User-agent: EARTHCOM.info Disallow: / # http://www.majestic12.co.uk/projects/dsearch/mj12bot.php User-agent: MJ12bot Disallow: / # ??? User-agent: Zao-Crawler Disallow: / # http://www.omni-explorer.com Internet Categorizer User-agent: OmniExplorer_Bot Disallow: / # http://www.aipbot.com User-agent: aipbot Disallow: / # http://www.entireweb.com User-agent: Speedy Spider Disallow: / # http://www.aberja.de User-agent: Aberja Checkomat Disallow: / # http://irl.cs.tamu.edu/crawler User-agent: IRLbot Disallow: / # http://wortschatz.uni-leipzig.de/nextlinks/findlinks.html User-agent: findlinks Disallow: / # http://gossamer-threads.com/scripts/links/ User-agent: Links 2.0 Disallow: / # ?? User-agent: MnoGoSearch Disallow: / # Ask Jeeves: http://sp.ask.com/docs/about/tech_crawling.html User-agent: Teoma Disallow: / # ?? User-agent: boitho.com-dc Disallow: / # ?? User-agent: wbdbot Disallow: / # Czech search engine User-agent: ccubee Disallow: / # IBM User-agent: http://www.almaden.ibm.com/cs/crawler Disallow: / # http://szukaj.onet.pl User-agent: OnetSzukaj Disallow: / # http://squigglebot.com User-agent: SquigglebotBot Disallow: / # http://64.124.122.252/feedback.html User-agent: RufusBot Disallow: / # Scam User-agent: Scumbot Disallow: / # http://www.boitho.com/dcbot.html User-agent: boitho.com-dc Disallow: / # ???? User-agent: voyager Disallow: / # www.ansearch.com.au User-agent: AnsearchBot Disallow: / # www.local.com User-agent: LocalcomBot Disallow: / # http://www.cazoodle.com User-agent: CazoodleBot Disallow: / # http://www.sitesell.com/sbider.html User-agent: SBIder Disallow: / # http://www.exabot.com/go/robot User-agent: Exabot Disallow: / # Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com) User-agent: Accoona-AI-Agent Disallow: / # http://kc.nict.go.jp/icc/crawl.html User-agent: LC-Crawler Disallow: / # http://kc.nict.go.jp/icc/crawl.html User-agent: ICC-Crawler Disallow: / # http://www.boitho.com/dcbot.html User-agent: SearchDaimon.com-dc Disallow: / # http://www.sogou.com/docs/help/webmasters.htm#07 User-agent: sogou web spider Disallow: / # http://www.boitho.com/dcbot.html User-agent: boitho.com-dc Disallow: / # http://www.envolk.com/envolkspiderinfo.html User-agent: envolk Disallow: / # http://www.webalta.net/ru/about_webmaster.html User-agent: WebAlta Crawler Disallow: / # RedBot/redbot-1.0 (Rediff.com Crawler; redbot at rediff dot com) User-agent: RedBot Disallow: / # Unknown User-agent: Yandex Disallow: / # http://www.yodao.com/help/webmaster/spider/ User-agent: YodaoBot-Image Disallow: /