phpDocumentor default
[ class tree: default ] [ index: default ] [ all elements ]

Class: robots

Source Location: /ua/robots.cls.php

Class robots

Property Summary
mixed   $robots_lib  

[ Top ]
Method Summary
void   get_robots_info()  

[ Top ]
Properties
mixed   $robots_lib = array(
// 'robot id' => 'robot clear text'

# Common robots (In robot file)
'appie' => 'Walhello appie',
'architext' => 'ArchitextSpider',
'jeeves' => 'AskJeeves',
'bjaaland' => 'Bjaaland',
'ferret' => 'Wild Ferret Web Hopper #1, #2, #3',
'googlebot' => 'Googlebot', // Google
'gulliver' => 'Northern Light Gulliver',
'harvest' => 'Harvest',
'htdig' => 'ht://Dig',
'linkwalker' => 'LinkWalker',
'lycos_' => 'Lycos',
'moget' => 'moget',
'muscatferret' => 'Muscat Ferret',
'myweb' => 'Internet Shinchakubin',
'nextgensearchbot' => 'ZoomInfo Crawler', // Add UPK
'nomad' => 'Nomad',
'scooter' => 'Scooter', // AltaVista
'slurp' => 'Inktomi Slurp',
'^voyager\/' => 'Voyager',
'weblayers' => 'weblayers',

# Common robots (Not in robot file)
'antibot' => 'Antibot',
'bruinbot' => 'The web archive',
'digout4u' => 'Digout4u',
'echo' => 'EchO!',
'fast\-webcrawler' => 'Fast-Webcrawler', // AllTheWeb
'ia_archiver\-web\.archive\.org' => '', # Must be before ia_archiver to avoid confusion with alexa
'ia_archiver' => 'Alexa (IA Archiver)',
'jennybot' => 'JennyBot',
'mercator' => 'Mercator',
'msnbot' => 'MSNBot', // MSN
'netcraft' => 'Netcraft',
'petersnews' => 'Petersnews',
'unlost_web_crawler' => 'Unlost Web Crawler',
'voila' => 'Voila',
'webbase' => 'WebBase',
'webcollage' => 'WebCollage',
'cfetch' => 'cfetch',
'zyborg' => 'ZyBorg', // Must be before wisenut
'wisenutbot' => 'WISENutbot', // Looksmart

# Less common robots (In robot file)
'[^a]fish' => 'Fish search',
'abcdatos' => 'ABCdatos BotLink',
'acme\.spider' => 'Acme.Spider',
'ahoythehomepagefinder' => 'Ahoy! The Homepage Finder',
'alkaline' => 'Alkaline',
'anthill' => 'Anthill',
'arachnophilia' => 'Arachnophilia',
'arale' => 'Arale',
'araneo' => 'Araneo',
'aretha' => 'Aretha',
'ariadne' => 'ARIADNE',
'powermarks' => '',
'arks' => 'arks',
'aspider' => 'ASpider (Associative Spider)',
'atn\.txt' => 'ATN Worldwide',
'atomz' => 'Atomz.com Search Robot',
'auresys' => 'AURESYS',
'backrub' => 'BackRub',
'bbot' => 'BBot',
'bigbrother' => 'Big Brother',
'blackwidow' => 'BlackWidow',
'blindekuh' => 'Die Blinde Kuh',
'bloodhound' => 'Bloodhound',
'borg\-bot' => 'Borg-Bot',
'brightnet' => 'bright.net caching robot',
'bspider' => 'BSpider',
'cactvschemistryspider' => 'CACTVS Chemistry Spider',
'calif[^r]' => 'Calif',
'cassandra' => 'Cassandra',
'cgireader' => 'Digimarc Marcspider/CGI',
'checkbot' => 'Checkbot',
'christcrawler' => 'ChristCrawler.com',
'churl' => 'churl',
'cienciaficcion' => 'cIeNcIaFiCcIoN.nEt',
'collective' => 'Collective',
'combine' => 'Combine System',
'conceptbot' => 'Conceptbot',
'coolbot' => 'CoolBot',
'core' => 'Web Core / Roots',
'cosmos' => 'XYLEME Robot',
'cruiser' => 'Internet Cruiser Robot',
'cusco' => 'Cusco',
'cyberspyder' => 'CyberSpyder Link Test',
'desertrealm' => 'Desert Realm Spider',
'deweb' => 'DeWeb(c) Katalog/Index',
'dienstspider' => 'DienstSpider',
'digger' => 'Digger',
'diibot' => 'Digital Integrity Robot',
'direct_hit' => 'Direct Hit Grabber',
'dnabot' => 'DNAbot',
'download_express' => 'DownLoad Express',
'dragonbot' => 'DragonBot',
'dwcp' => 'DWCP (Dridus\' Web Cataloging Project)',
'e\-collector' => 'e-collector',
'ebiness' => 'EbiNess',
'elfinbot' => 'ELFINBOT',
'emacs' => 'Emacs-w3 Search Engine',
'emcspider' => 'ananzi',
'esther' => 'Esther',
'evliyacelebi' => 'Evliya Celebi',
'fastcrawler' => 'FastCrawler',
'fdse' => 'Fluid Dynamics Search Engine robot',
'felix' => 'Felix IDE',
'fetchrover' => 'FetchRover',
'fido' => 'fido',
'finnish' => 'H舂臧臾ki',
'fireball' => 'KIT-Fireball',
'fouineur' => 'Fouineur',
'francoroute' => 'Robot Francoroute',
'freecrawl' => 'Freecrawl',
'funnelweb' => 'FunnelWeb',
'gama' => 'gammaSpider, FocusedCrawler',
'gazz' => 'gazz',
'gcreep' => 'GCreep',
'getbot' => 'GetBot',
'geturl' => 'GetURL',
'golem' => 'Golem',
'grapnel' => 'Grapnel/0.01 Experiment',
'griffon' => 'Griffon',
'gromit' => 'Gromit',
'gulperbot' => 'Gulper Bot',
'hambot' => 'HamBot',
'havindex' => 'havIndex',
'hometown' => 'Hometown Spider Pro',
'htmlgobble' => 'HTMLgobble',
'hyperdecontextualizer' => 'Hyper-Decontextualizer',
'iajabot' => 'iajaBot',
'iconoclast' => 'Popular Iconoclast',
'ilse' => 'Ingrid',
'imagelock' => 'Imagelock',
'incywincy' => 'IncyWincy',
'informant' => 'Informant',
'infoseek' => 'InfoSeek Robot 1.0',
'infoseeksidewinder' => 'Infoseek Sidewinder',
'infospider' => 'InfoSpiders',
'inspectorwww' => 'Inspector Web',
'intelliagent' => 'IntelliAgent',
'irobot' => 'I, Robot',
'iron33' => 'Iron33',
'israelisearch' => 'Israeli-search',
'javabee' => 'JavaBee',
'jbot' => 'JBot Java Web Robot',
'jcrawler' => 'JCrawler',
'jobo' => 'JoBo Java Web Robot',
'jobot' => 'Jobot',
'joebot' => 'JoeBot',
'jubii' => 'The Jubii Indexing Robot',
'jumpstation' => 'JumpStation',
'kapsi' => 'image.kapsi.net',
'katipo' => 'Katipo',
'kilroy' => 'Kilroy',
'ko_yappo_robot' => 'KO_Yappo_Robot',
'labelgrabber\.txt' => 'LabelGrabber',
'larbin' => 'larbin',
'legs' => 'legs',
'linkidator' => 'Link Validator',
'linkscan' => 'LinkScan',
'lockon' => 'Lockon',
'logo_gif' => 'logo.gif Crawler',
'macworm' => 'Mac WWWWorm',
'magpie' => 'Magpie',
'marvin' => 'marvin/infoseek',
'mattie' => 'Mattie',
'mediafox' => 'MediaFox',
'merzscope' => 'MerzScope',
'meshexplorer' => 'NEC-MeshExplorer',
'mindcrawler' => 'MindCrawler',
'mnogosearch' => 'mnoGoSearch search engine software',
'momspider' => 'MOMspider',
'monster' => 'Monster',
'motor' => 'Motor',
'muncher' => 'Muncher',
'mwdsearch' => 'Mwd.Search',
'ndspider' => 'NDSpider',
'nederland\.zoek' => 'Nederland.zoek',
'netcarta' => 'NetCarta WebMap Engine',
'netmechanic' => 'NetMechanic',
'netscoop' => 'NetScoop',
'newscan\-online' => 'newscan-online',
'nhse' => 'NHSE Web Forager',
'northstar' => 'The NorthStar Robot',
'nzexplorer' => 'nzexplorer',
'objectssearch' => 'ObjectsSearch',
'occam' => 'Occam',
'octopus' => 'HKU WWW Octopus',
'openfind' => 'Openfind data gatherer',
'orb_search' => 'Orb Search',
'packrat' => 'Pack Rat',
'pageboy' => 'PageBoy',
'parasite' => 'ParaSite',
'patric' => 'Patric',
'pegasus' => 'pegasus',
'perignator' => 'The Peregrinator',
'perlcrawler' => 'PerlCrawler 1.0',
'phantom' => 'Phantom',
'phpdig' => 'PhpDig',
'piltdownman' => 'PiltdownMan',
'pimptrain' => 'Pimptrain.com\'s robot',
'pioneer' => 'Pioneer',
'pitkow' => 'html_analyzer',
'pjspider' => 'Portal Juice Spider',
'plumtreewebaccessor' => 'PlumtreeWebAccessor',
'poppi' => 'Poppi',
'portalb' => 'PortalB Spider',
'psbot' => 'psbot',
'python' => 'The Python Robot',
'raven' => 'Raven Search',
'rbse' => 'RBSE Spider',
'resumerobot' => 'Resume Robot',
'rhcs' => 'RoadHouse Crawling System',
'road_runner' => 'Road Runner: The ImageScape Robot',
'robbie' => 'Robbie the Robot',
'robi' => 'ComputingSite Robi/1.0',
'robocrawl' => 'RoboCrawl Spider',
'robofox' => 'RoboFox',
'robozilla' => 'Robozilla',
'roverbot' => 'Roverbot',
'rules' => 'RuLeS',
'safetynetrobot' => 'SafetyNet Robot',
'search\-info' => 'Sleek',
'search_au' => 'Search.Aus-AU.COM',
'searchprocess' => 'SearchProcess',
'senrigan' => 'Senrigan',
'sgscout' => 'SG-Scout',
'shaggy' => 'ShagSeeker',
'shaihulud' => 'Shai\'Hulud',
'sift' => 'Sift',
'simbot' => 'Simmany Robot Ver1.0',
'site\-valet' => 'Site Valet',
'sitetech' => 'SiteTech-Rover',
'skymob' => 'Skymob.com',
'slcrawler' => 'SLCrawler',
'smartspider' => 'Smart Spider',
'snooper' => 'Snooper',
'solbot' => 'Solbot',
'speedy' => 'Speedy Spider',
'spider_monkey' => 'spider_monkey',
'spiderbot' => 'SpiderBot',
'spiderline' => 'Spiderline Crawler',
'spiderman' => 'SpiderMan',
'spiderview' => 'SpiderView(tm)',
'spry' => 'Spry Wizard Robot',
'sqworm' => 'Sqworm',
'ssearcher' => 'Site Searcher',
'suke' => 'Suke',
'suntek' => 'suntek search engine',
'sven' => 'Sven',
'tach_bw' => 'TACH Black Widow',
'tarantula' => 'Tarantula',
'tarspider' => 'tarspider',
'techbot' => 'TechBOT',
'templeton' => 'Templeton',
'titan' => 'TITAN',
'titin' => 'TitIn',
'tkwww' => 'The TkWWW Robot',
'tlspider' => 'TLSpider',
'ucsd' => 'UCSD Crawl',
'udmsearch' => 'UdmSearch',
'urlck' => 'URL Check',
'valkyrie' => 'Valkyrie',
'verticrawl' => 'Verticrawl',
'victoria' => 'Victoria',
'visionsearch' => 'vision-search',
'voidbot' => 'void-bot',
'vwbot' => 'VWbot',
'w3index' => 'The NWI Robot',
'w3m2' => 'W3M2',
'wallpaper' => 'WallPaper (alias crawlpaper)',
'wanderer' => 'the World Wide Web Wanderer',
'wapspider' => 'w@pSpider by wap4.com',
'webbandit' => 'WebBandit Web Spider',
'webcatcher' => 'WebCatcher',
'webcopy' => 'WebCopy',
'webfetcher' => 'webfetcher',
'webfoot' => 'The Webfoot Robot',
'webinator' => 'Webinator',
'weblinker' => 'WebLinker',
'webmirror' => 'WebMirror',
'webmoose' => 'The Web Moose',
'webquest' => 'WebQuest',
'webreader' => 'Digimarc MarcSpider',
'webreaper' => 'WebReaper',
'websnarf' => 'Websnarf',
'webspider' => 'WebSpider',
'webvac' => 'WebVac',
'webwalk' => 'webwalk',
'webwalker' => 'WebWalker',
'webwatch' => 'WebWatch',
'whatuseek' => 'whatUseek Winona',
'whowhere' => 'WhoWhere Robot',
'wired\-digital' => 'Wired Digital',
'wmir' => 'w3mir',
'wolp' => 'WebStolperer',
'wombat' => 'The Web Wombat',
'worm' => 'The World Wide Web Worm',
'wwwc' => 'WWWC Ver 0.2.5',
'wz101' => 'WebZinger',
'xget' => 'XGET',

# Other robots reported by users
'almaden' => 'IBM Almaden',
'aport' => 'Aport',
'argus' => 'Argus', // Must be before nutch
'asterias' => 'Asterias',
'awbot' => 'AWBot',
'baiduspider' => 'BaiDuSpider',
'becomebot' => 'BecomeBot',
'bender' => 'bender',
'bloglines' => 'Bloglines',
'blogpulse' => 'BlogPulse ISSpider intelliseek.com',
'blogshares' => 'Blogshares Spiders',
'blogslive' => 'Blogslive',
'blogssay' => 'BlogsSay :: RSS Search Crawler',
'bobby' => 'Bobby',
'boris' => 'Boris',
'bumblebee' => 'Bumblebee (relevare.com)',
'converacrawler' => 'ConveraCrawler',
'cscrawler' => 'CsCrawler',
'daviesbot' => 'DaviesBot',
'daypopbot' => 'DayPop',
'dipsie\.bot' => 'Dipsie',
'domainsdb\.net' => 'DomainsDB.net',
'exactseek' => 'ExactSeek Crawler',
'ezresult' => 'Ezresult',
'everbeecrawler' => 'EverbeeCrawler',
'ezresult' => 'Ezresult',
'enteprise' => 'Fast Enteprise Crawler',
'feedburner' => 'Feedburner',
'feedfetcher\-google' => 'Feedfetcher-Google',
'feedster' => 'Feedster',
'findlinks' => 'findlinks',
'gaisbot' => 'Gaisbot',
'geniebot' => 'geniebot',
'gigabot' => 'GigaBot',
'girafabot' => 'Girafabot',
'gnodspider' => 'GNOD Spider',
'grub' => 'Grub.org',
'henrythemiragorobot' => 'Mirago',
'holmes' => 'Holmes',
'infomine' => 'INFOMINE VLCrawler',
'internetseer' => 'InternetSeer',
'justview' => 'JustView',
'keyoshid' => 'Yahoo! Japan keyoshid robot study', // Must come before Y!J
'kinjabot' => 'Yahoo! Japan keyoshid robot study',
'kinja\-imagebot' => 'Kinja Imagebot',
'linkbot' => 'LinkBot',
'metager\-linkchecker' => 'MetaGer LinkChecker',
'linkchecker' => 'LinkChecker',
'livejournal\.com' => 'LiveJournal.com',
'lmspider' => 'lmspider',
'magpierss' => 'MagpieRSS',
'mediapartners\-google' => 'Google AdSense',
'microsoft_url_control' => 'Microsoft URL Control',
'mj12bot' => 'MJ12bot',
'msiecrawler' => 'MSIECrawler',
'nagios' => 'Nagios',
'newsgatoronline' => 'NewsGator Online',
'noxtrumbot' => 'noxtrumbot',
'nutch' => 'Nutch',
'opentaggerbot' => 'OpenTaggerBot',
'outfoxbot' => 'OutfoxBot',
'perman' => 'Perman surfer',
'pluckfeedcrawler' => 'PluckFeedCrawler',
'pompos' => 'Pompos',
'popdexter' => 'Popdexter',
'rambler' => 'StackRambler',
'redalert' => 'Red Alert',
'rojo' => 'RoJo',
'rssimagesbot' => 'rssImagesBot',
'ruffle' => 'ruffle SemanticWeb crawler',
'rufusbot' => 'RufusBot Rufus Web Miner',
'sandcrawler' => 'SandCrawler (Microsoft)',
'sbider' => 'SBIder',
'seekbot' => 'Seekbot',
'seznambot' => 'SeznamBot',
'shoutcast' => 'Shoutcast Directory Service',
'slysearch' => 'SlySearch',
'sohu-search' => 'sohu-search',
'surveybot' => 'SurveyBot',
'syndic8' => 'Syndic8',
'technoratibot' => 'Technoratibot',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e' => 'T-H-U-N-D-E-R-S-T-O-N-E',
'topicblogs' => 'topicblogs',
'turnitinbot' => 'Turn It In',
'turtlescanner' => 'Turtle', // Must be before turtle
'turtle' => 'Turtle',
'ultraseek' => 'Ultraseek',
'w3c\-checklink' => 'W3C Link Checker',
'w3c_css_validator_jfouffa' => 'W3C jigsaw CSS Validator',
'w3c_validator' => 'W3C Validator',
'webclipping\.com' => 'WebClipping.com',
'webcompass' => 'webcompass',
'webvulncrawl' => 'WebVulnCrawl',
'wonderer' => 'Web Wombat Redback Spider',
'y!j' => 'Y!J Yahoo Japan', // Must come after keyoshid Y!J
'yacy' => 'yacy',
'yahoo\-blogs' => 'Yahoo-Blogs',
'yahoo\-verticalcrawler'=> 'Yahoo Vertical Crawler', // Yahoo
'yahoofeedseeker' => 'Yahoo Feed Seeker',
'yahooseeker\-testing' => 'YahooSeeker-Testing',
'yahooseeker' => 'YahooSeeker Yahoo! Blog crawler',
'yahoo\-mmcrawler' => 'Yahoo-MMCrawler',
'yandex' => 'Yandex bot',
'zealbot' => 'ZealBot',
'zyborg' => 'Zyborg', // Looksmart
'ng\/1\.' => 'NG 1.x (Exalead)', // put at end to avoid false positive
'ng\/2\.' => 'NG 2.x (Exalead)', // put at end to avoid false positive
# UPK Add
'OmniExplorer_Bot' => 'OmniExplorer Bot',
'ichiro\/' => 'goo',
'moewe' => 'doko.jp',
# Generic root ID
'robot' => 'Unknown robot (identified by \'robot\')',
'crawl' => 'Unknown robot (identified by \'crawl\')',
'spider' => 'Unknown robot (identified by \'spider\')',
'\wbot[\/\-]' => 'Unknown robot (identified by \'bot/\' or \'bot-\')',

# Unknown robots identified by hit on robots.txt
'unknown' => 'Unknown robot (identified by hit on \'robots.txt\')',
)
[line 21]

[ Top ]
Methods
get_robots_info  [line 450]

  void get_robots_info( $ua  )

Parameters:
   $ua: 


[ Top ]

Documentation generated on Mon, 09 Jul 2007 21:56:26 +0900 by phpDocumentor 1.3.2