supporting bot user-agents

This commit is contained in:
Ray Lothian 2020-08-04 12:16:03 +02:00
parent d0340982fc
commit bf16cb694a
13 changed files with 44 additions and 16 deletions

1
node/bots.json Normal file
View file

@ -0,0 +1 @@
["Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", "Sogou Pic Spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou head spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou Orion spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou-Test-Spider/4.0 (compatible; MSIE 5.5; Windows 98)", "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)", "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)", "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)", "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)"]

View file

@ -0,0 +1 @@
[{"ua":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; Qwantify/Bleriot/1.1; +https://help.qwant.com/bot)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; SemrushBot/3~bl; +http://www.semrush.com/bot.html)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/help/robots)","browser":{},"engine":{},"os":{"name":"Linux","version":"x86_64"},"device":{},"cpu":{"architecture":"amd64"}},{"ua":"Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; SeznamBot/3.2; +http://napoveda.seznam.cz/en/seznambot-intro/)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; Googlebot/2.1; startmebot/1.0; +https://start.me/bot)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (TweetmemeBot/4.0; +http://datasift.com/bot.html) Gecko/20100101 Firefox/31.0","browser":{"name":"Firefox","version":"31.0","major":"31"},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; Go-http-client/1.1; +centurybot9@gmail.com)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; SemrushBot/1.0~bm; +http://www.semrush.com/bot.html)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible;contxbot/1.0)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36","browser":{"name":"Safari"},"engine":{"name":"WebKit","version":"537.36"},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}},{"ua":"Mozilla/5.0 (compatible; SemrushBot/6~bl; +http://www.semrush.com/bot.html)","browser":{},"engine":{},"os":{},"device":{},"cpu":{}}]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
[{"ua":"Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; AspiegelBot)","browser":{"name":"Mobile Safari"},"engine":{"name":"WebKit","version":"537.36"},"os":{"name":"Android","version":"7.0"},"device":{"type":"mobile"},"cpu":{}}]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -18,7 +18,6 @@ const write = ({name, content}, callback) => fs.writeFile('./browsers/' + name,
console.log(e);
}
setTimeout(callback, 0);
console.log(name);
});
// reduce total number to < 400 entries while keeping the last 10 percent of uas
@ -42,6 +41,7 @@ fs.readdir('./browsers/', async (err, files) => {
}
//
const list = [
...require('./bots.json'),
...require('./list-1.json'),
...require('./list-2.json'),
...require('./list-3.json'),
@ -51,7 +51,7 @@ fs.readdir('./browsers/', async (err, files) => {
...require('./list-7.json'),
...require('./list-8.json'),
...require('./list-9.json')
].filter((s, i, l) => l.indexOf(s) === i && ['bot', 'fb_iab', 'fbsv', 'w3m', 'elinks'].some(k => s.toLowerCase().indexOf(k) !== -1) === false);
].filter((s, i, l) => l.indexOf(s) === i && ['fb_iab', 'fbsv', 'w3m', 'elinks'].some(k => s.toLowerCase().indexOf(k) !== -1) === false);
for (const ua of list) {
parser.setUA(ua);
const o = parser.getResult();
@ -69,10 +69,19 @@ fs.readdir('./browsers/', async (err, files) => {
map.os[ss] = map.os[ss] || [];
map.os[ss].push(o.os.name);
}
else if (ua.toLowerCase().indexOf('bot') !== -1) {
cache.bot = cache.bot || {
'misc': []
};
cache.bot.misc.push(o);
map.browser.bot = map.browser.bot || ['Bot'];
map.os.misc = map.os.misc || ['Misc'];
}
else {
// console.log(ua);
// console.log('skipped', ua);
}
}
const contents = [];
for (const browser of Object.keys(cache)) {
for (const os of Object.keys(cache[browser])) {
@ -122,7 +131,6 @@ fs.readdir('./browsers/', async (err, files) => {
'chrome'
].some(k => k === s) === false);
if (map.browser[browser].length > 1) {
console.log(map.browser[browser]);
throw Error('Duplicated browser; add the ones that need to be removed to the list');
}
}

View file

@ -1 +1 @@
{"browser":["Opera","Firefox","Chrome","IE","Mobile Safari","IEMobile","Safari","Android Browser","Opera Mobi","Opera Mini","UCBrowser","Puffin","Samsung Browser","Yandex","MIUI Browser","Edge","WebKit","Blazer","BOLT","Fennec","GoBrowser","Maemo Browser","Minimo","Kindle","Opera Mi","Skyfire","Chromium","Avant ","Maxthon","Arora","Mozilla","Epiphany","Camino","Chimera","Comodo Dragon","Conkeror","Firebird","Swiftfox","Netscape","Flock","iCab","Iceape","IceCat","IceWeasel","Iron","K-Meleon","Konqueror","Links","Lunascape","Lynx","Midori","KHTML","Mosaic","NetSurf","OmniWeb","Opera Tablet","PaleMoon","Phoenix","RockMelt","SeaMonkey","Slim","IceDragon","Waterfox","GSA","Vivaldi","Avast Secure Browser","Chrome WebView","QQBrowser","Iridium","AVG Secure Browser","Basilisk","Chrome Headless","baiduboxapp","Silk","Opera Touch"],"os":["Mac OS","Windows","Android","iOS","Windows Phone","BlackBerry","Symbian","Linux","Windows Phone OS","OpenBSD","Unix","Ubuntu","Fedora","Debian","BeOS","Haiku","Solaris","Chromium OS","NetBSD","FreeBSD","Slackware","SUSE","Gentoo","Mageia","CentOS","Mint","DragonFly","Kubuntu","Mandriva","Zenwalk","GNU","OS/2","AIX","QNX","RISC OS","Nintendo","OpenSolaris","AmigaOS","BSD","OpenVMS"],"matching":{"opera":["mac os","windows","android","symbian","linux","ubuntu","debian","mint","freebsd","nintendo","opensolaris","solaris","openbsd","kubuntu","unix"],"firefox":["mac os","windows","android","ios","ubuntu","linux","fedora","openbsd","netbsd","mageia","freebsd","gentoo","suse","centos","slackware","mint","dragonfly","solaris","kubuntu","mandriva","beos","debian"],"chrome":["mac os","windows","android","linux","openbsd","chromium os","netbsd","freebsd","slackware","suse","ubuntu","debian","ios","fedora","unix"],"ie":["windows","mac os","solaris","linux","unix"],"mobile safari":["ios","blackberry","windows","mac os"],"iemobile":["windows phone","windows phone os","windows"],"safari":["mac os","android","symbian","windows","linux","ios","bsd","netbsd"],"android browser":["android","windows"],"opera mobi":["android","symbian","windows","mac os","linux"],"opera mini":["ios","symbian","blackberry","android","windows","mac os","linux","unix"],"ucbrowser":["android","windows"],"puffin":["android"],"samsung browser":["android","linux"],"yandex":["android","windows","mac os","linux"],"miui browser":["android"],"edge":["windows phone","windows","mac os"],"webkit":["blackberry","symbian","mac os","windows","linux","unix","ios"],"blazer":["windows"],"bolt":["windows"],"fennec":["android","windows","linux","mac os"],"gobrowser":["android"],"maemo browser":["linux"],"minimo":["linux","windows","openbsd"],"kindle":["linux","android"],"opera mi":["windows"],"skyfire":["mac os"],"chromium":["ubuntu","linux","netbsd"],"avant ":["windows"],"maxthon":["windows"],"arora":["linux","windows"],"mozilla":["windows","debian","linux","ubuntu","mac os","openbsd","beos","haiku","solaris","suse","fedora","gentoo","slackware","mint","mandriva","kubuntu","centos","freebsd","zenwalk","os/2","aix","qnx","netbsd","android","openvms","unix"],"epiphany":["linux","ubuntu","openbsd","freebsd","suse","fedora","debian","gentoo","solaris"],"camino":["mac os"],"chimera":["mac os"],"comodo dragon":["windows","linux"],"conkeror":["debian","windows","linux"],"firebird":["windows","mac os","solaris","linux"],"swiftfox":["linux"],"netscape":["windows","mac os","linux","solaris","aix"],"flock":["mac os","windows","linux"],"icab":["mac os","windows"],"iceape":["linux"],"icecat":["linux"],"iceweasel":["linux","debian","gentoo","ubuntu","windows"],"iron":["windows","linux","mac os"],"k-meleon":["windows","linux","freebsd"],"konqueror":["linux","freebsd","fedora","kubuntu","slackware","openbsd","dragonfly","windows","solaris","netbsd","suse","debian"],"links":["unix","linux","gentoo","openbsd","netbsd","freebsd","mac os","solaris","debian"],"lunascape":["windows"],"lynx":["gnu"],"midori":["linux","freebsd","windows","netbsd"],"khtml":["windows"],"mosaic":["windows","aix","solaris"],"netsurf":["risc os","linux","netbsd"],"omniweb":["mac os"],"opera tablet":["windows","symbian"],"palemoon":["windows"],"phoenix":["linux","windows","mac os"],"rockmelt":["windows","mac os"],"seamonkey":["windows","linux","os/2","mac os","freebsd","openbsd","fedora","suse","mandriva","gentoo","beos","haiku","amigaos","centos","netbsd"],"slim":["windows","android"],"icedragon":["windows"],"waterfox":["windows","linux","mac os"],"gsa":["ios"],"vivaldi":["windows","linux","netbsd","mac os"],"avast secure browser":["windows"],"chrome webview":["android"],"qqbrowser":["windows"],"iridium":["windows"],"avg secure browser":["windows"],"basilisk":["windows"],"chrome headless":["linux"],"baiduboxapp":["android"],"silk":["mac os","android","linux"],"opera touch":["ios"]}}
{"browser":["Bot","IE","Konqueror","Opera","Firefox","Chrome","Mobile Safari","IEMobile","Safari","Android Browser","Opera Mobi","Opera Mini","UCBrowser","Puffin","Samsung Browser","Yandex","MIUI Browser","Edge","WebKit","Blazer","BOLT","Fennec","GoBrowser","Maemo Browser","Minimo","Kindle","Opera Mi","Skyfire","Chromium","Avant ","Maxthon","Arora","Mozilla","Epiphany","Camino","Chimera","Comodo Dragon","Conkeror","Firebird","Swiftfox","Netscape","Flock","iCab","Iceape","IceCat","IceWeasel","Iron","K-Meleon","Links","Lunascape","Lynx","Midori","KHTML","Mosaic","NetSurf","OmniWeb","Opera Tablet","PaleMoon","Phoenix","RockMelt","SeaMonkey","Slim","IceDragon","Waterfox","GSA","Vivaldi","Avast Secure Browser","Chrome WebView","QQBrowser","Iridium","AVG Secure Browser","Basilisk","Chrome Headless","baiduboxapp","Silk","Opera Touch"],"os":["Misc","Windows","Linux","Mac OS","Android","iOS","Windows Phone","BlackBerry","Symbian","Windows Phone OS","OpenBSD","Unix","Ubuntu","Fedora","Debian","BeOS","Haiku","Solaris","Chromium OS","NetBSD","FreeBSD","Slackware","SUSE","Gentoo","Mageia","CentOS","Mint","DragonFly","Kubuntu","Mandriva","Zenwalk","GNU","OS/2","AIX","QNX","RISC OS","Nintendo","OpenSolaris","AmigaOS","BSD","OpenVMS"],"matching":{"bot":["misc"],"ie":["windows","mac os","solaris","linux","unix"],"konqueror":["linux","freebsd","fedora","kubuntu","slackware","openbsd","dragonfly","windows","solaris","netbsd","suse","debian"],"opera":["mac os","windows","android","symbian","linux","ubuntu","debian","mint","freebsd","nintendo","opensolaris","solaris","openbsd","kubuntu","unix"],"firefox":["mac os","windows","android","ios","ubuntu","linux","fedora","openbsd","netbsd","mageia","freebsd","gentoo","suse","centos","slackware","mint","dragonfly","solaris","kubuntu","mandriva","beos","debian"],"chrome":["mac os","windows","android","linux","openbsd","chromium os","netbsd","freebsd","slackware","suse","ubuntu","debian","ios","fedora","unix"],"mobile safari":["ios","blackberry","windows","android","mac os"],"iemobile":["windows phone","windows phone os","windows"],"safari":["mac os","android","symbian","windows","linux","ios","bsd","netbsd"],"android browser":["android","windows"],"opera mobi":["android","symbian","windows","mac os","linux"],"opera mini":["ios","symbian","blackberry","android","windows","mac os","linux","unix"],"ucbrowser":["android","windows"],"puffin":["android"],"samsung browser":["android","linux"],"yandex":["android","windows","mac os","linux"],"miui browser":["android"],"edge":["windows phone","windows","mac os"],"webkit":["blackberry","symbian","mac os","windows","linux","unix","ios"],"blazer":["windows"],"bolt":["windows"],"fennec":["android","windows","linux","mac os"],"gobrowser":["android"],"maemo browser":["linux"],"minimo":["linux","windows","openbsd"],"kindle":["linux","android"],"opera mi":["windows"],"skyfire":["mac os"],"chromium":["ubuntu","linux","netbsd"],"avant ":["windows"],"maxthon":["windows"],"arora":["linux","windows"],"mozilla":["windows","debian","linux","ubuntu","mac os","openbsd","beos","haiku","solaris","suse","fedora","gentoo","slackware","mint","mandriva","kubuntu","centos","freebsd","zenwalk","os/2","aix","qnx","netbsd","android","openvms","unix"],"epiphany":["linux","ubuntu","openbsd","freebsd","suse","fedora","debian","gentoo","solaris"],"camino":["mac os"],"chimera":["mac os"],"comodo dragon":["windows","linux"],"conkeror":["debian","windows","linux"],"firebird":["windows","mac os","solaris","linux"],"swiftfox":["linux"],"netscape":["windows","mac os","linux","solaris","aix"],"flock":["mac os","windows","linux"],"icab":["mac os","windows"],"iceape":["linux"],"icecat":["linux"],"iceweasel":["linux","debian","gentoo","ubuntu","windows"],"iron":["windows","linux","mac os"],"k-meleon":["windows","linux","freebsd"],"links":["unix","linux","gentoo","openbsd","netbsd","freebsd","mac os","solaris","debian"],"lunascape":["windows"],"lynx":["gnu"],"midori":["linux","freebsd","windows","netbsd"],"khtml":["windows"],"mosaic":["windows","aix","solaris"],"netsurf":["risc os","linux","netbsd"],"omniweb":["mac os"],"opera tablet":["windows","symbian"],"palemoon":["windows"],"phoenix":["linux","windows","mac os"],"rockmelt":["windows","mac os"],"seamonkey":["windows","linux","os/2","mac os","freebsd","openbsd","fedora","suse","mandriva","gentoo","beos","haiku","amigaos","centos","netbsd"],"slim":["windows","android"],"icedragon":["windows"],"waterfox":["windows","linux","mac os"],"gsa":["ios"],"vivaldi":["windows","linux","netbsd","mac os"],"avast secure browser":["windows"],"chrome webview":["android"],"qqbrowser":["windows"],"iridium":["windows"],"avg secure browser":["windows"],"basilisk":["windows"],"chrome headless":["linux"],"baiduboxapp":["android"],"silk":["mac os","android","linux"],"opera touch":["ios"]}}