2019-03-12 02:50:52 -07:00
|
|
|
'use strict';
|
|
|
|
|
2020-03-15 01:01:29 -07:00
|
|
|
const fs = require('fs');
|
|
|
|
const path = require('path');
|
|
|
|
const UAParser = require('./ua-parser.min.js');
|
2019-03-12 02:50:52 -07:00
|
|
|
|
2020-03-15 01:01:29 -07:00
|
|
|
const cache = {};
|
|
|
|
const map = {
|
2019-06-25 03:22:49 -07:00
|
|
|
browser: {},
|
|
|
|
os: {},
|
|
|
|
matching: {}
|
|
|
|
};
|
2019-03-12 02:50:52 -07:00
|
|
|
|
2020-03-15 01:01:29 -07:00
|
|
|
const parser = new UAParser();
|
2019-03-12 02:50:52 -07:00
|
|
|
|
2020-03-15 01:01:29 -07:00
|
|
|
const write = ({name, content}, callback) => fs.writeFile('./browsers/' + name, content, 'utf8', e => {
|
2019-03-14 07:20:52 -07:00
|
|
|
if (e) {
|
|
|
|
console.log(e);
|
|
|
|
}
|
|
|
|
setTimeout(callback, 0);
|
|
|
|
});
|
|
|
|
|
2020-08-03 23:22:37 -07:00
|
|
|
// reduce total number to < 400 entries while keeping the last 10 percent of uas
|
2020-08-03 23:03:50 -07:00
|
|
|
const reduce = (arr, length = 400) => {
|
|
|
|
let pos = 1;
|
|
|
|
while (arr.length > length) {
|
|
|
|
arr.splice(pos, 1);
|
|
|
|
pos += 1;
|
|
|
|
pos = pos % (arr.length - Math.round(length / 10));
|
|
|
|
}
|
|
|
|
|
|
|
|
return arr;
|
|
|
|
};
|
|
|
|
|
2019-03-14 07:20:52 -07:00
|
|
|
fs.readdir('./browsers/', async (err, files) => {
|
2019-03-12 02:50:52 -07:00
|
|
|
if (err) throw err;
|
|
|
|
for (const file of files) {
|
|
|
|
fs.unlinkSync(path.join('./browsers/', file), err => {
|
|
|
|
if (err) throw err;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
//
|
|
|
|
const list = [
|
2020-09-06 01:28:05 -07:00
|
|
|
...require('./assets/list-01.json'),
|
|
|
|
...require('./assets/list-02.json'),
|
|
|
|
...require('./assets/list-03.json'),
|
|
|
|
...require('./assets/list-04.json'),
|
|
|
|
...require('./assets/list-05.json'),
|
|
|
|
...require('./assets/list-06.json'),
|
|
|
|
...require('./assets/list-07.json'),
|
|
|
|
...require('./assets/list-08.json'),
|
|
|
|
...require('./assets/list-09.json'),
|
2020-12-28 04:17:00 -08:00
|
|
|
...require('./assets/list-10.json'),
|
|
|
|
...require('./assets/list-11.json')
|
2020-08-04 03:16:03 -07:00
|
|
|
].filter((s, i, l) => l.indexOf(s) === i && ['fb_iab', 'fbsv', 'w3m', 'elinks'].some(k => s.toLowerCase().indexOf(k) !== -1) === false);
|
2019-03-12 02:50:52 -07:00
|
|
|
for (const ua of list) {
|
2020-12-28 04:17:00 -08:00
|
|
|
if (ua.startsWith('Mozilla/5.0 ') === false) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (ua.length < 10) {
|
|
|
|
console.log('[short agent]\t', ua);
|
|
|
|
}
|
|
|
|
if (ua.length > 200) {
|
|
|
|
console.log('[long agent]\t', ua);
|
|
|
|
}
|
|
|
|
if (ua.indexOf('http') !== -1) {
|
|
|
|
if (ua.indexOf('QtWeb') === -1 && ua.toLowerCase().indexOf('crawler') === -1 && ua.toLowerCase().indexOf('bot') === -1 && ua.toLowerCase().indexOf('spider') === -1) {
|
|
|
|
console.log('[contains HTTP]\t', ua);
|
|
|
|
}
|
|
|
|
}
|
2019-03-12 02:50:52 -07:00
|
|
|
parser.setUA(ua);
|
|
|
|
const o = parser.getResult();
|
|
|
|
if (o.browser.name && o.os.name) {
|
2019-06-25 03:22:49 -07:00
|
|
|
const bb = o.browser.name.toLowerCase();
|
|
|
|
const ss = o.os.name.toLowerCase();
|
|
|
|
|
|
|
|
cache[bb] = cache[bb] || {};
|
|
|
|
cache[bb][ss] = cache[bb][ss] || [];
|
|
|
|
cache[bb][ss].push(o);
|
|
|
|
|
|
|
|
map.browser[bb] = map.browser[bb] || [];
|
|
|
|
map.browser[bb].push(o.browser.name);
|
|
|
|
|
|
|
|
map.os[ss] = map.os[ss] || [];
|
|
|
|
map.os[ss].push(o.os.name);
|
2019-03-12 02:50:52 -07:00
|
|
|
}
|
2020-08-04 03:16:03 -07:00
|
|
|
else if (ua.toLowerCase().indexOf('bot') !== -1) {
|
|
|
|
cache.bot = cache.bot || {
|
|
|
|
'misc': []
|
|
|
|
};
|
|
|
|
cache.bot.misc.push(o);
|
|
|
|
map.browser.bot = map.browser.bot || ['Bot'];
|
|
|
|
map.os.misc = map.os.misc || ['Misc'];
|
|
|
|
}
|
2019-03-14 07:20:52 -07:00
|
|
|
else {
|
2020-08-04 03:16:03 -07:00
|
|
|
// console.log('skipped', ua);
|
2019-03-14 07:20:52 -07:00
|
|
|
}
|
2019-03-12 02:50:52 -07:00
|
|
|
}
|
2020-08-04 03:16:03 -07:00
|
|
|
|
2019-03-14 07:20:52 -07:00
|
|
|
const contents = [];
|
2019-03-12 02:50:52 -07:00
|
|
|
for (const browser of Object.keys(cache)) {
|
|
|
|
for (const os of Object.keys(cache[browser])) {
|
2019-03-14 07:20:52 -07:00
|
|
|
const name = browser + '-' + os.replace(/\//g, '-') + '.json';
|
2020-08-03 23:03:50 -07:00
|
|
|
const uas = cache[browser][os];
|
|
|
|
const content = JSON.stringify(reduce(uas));
|
2019-03-14 07:20:52 -07:00
|
|
|
contents.push({
|
|
|
|
name,
|
|
|
|
content
|
|
|
|
});
|
2019-06-25 03:22:49 -07:00
|
|
|
map.matching[browser] = map.matching[browser] || [];
|
|
|
|
if (map.matching[browser].indexOf(os) === -1) {
|
|
|
|
map.matching[browser].push(os);
|
|
|
|
}
|
2019-03-12 02:50:52 -07:00
|
|
|
}
|
|
|
|
}
|
2019-03-14 07:20:52 -07:00
|
|
|
const once = () => {
|
|
|
|
const obj = contents.shift();
|
|
|
|
if (obj) {
|
|
|
|
write(obj, once);
|
|
|
|
}
|
|
|
|
else {
|
2019-06-25 03:22:49 -07:00
|
|
|
for (const os of Object.keys(map.os)) {
|
|
|
|
map.os[os] = map.os[os].filter((s, i, l) => l.indexOf(s) === i && [
|
|
|
|
'UNIX',
|
|
|
|
'debian',
|
|
|
|
'gentoo',
|
|
|
|
'ubuntu',
|
|
|
|
'WIndows',
|
|
|
|
'kubuntu'
|
|
|
|
].some(k => k === s) === false);
|
|
|
|
if (map.os[os].length > 1) {
|
|
|
|
throw Error('Duplicated OS; add the ones that need to be removed to the list: ', map.os[os].join(', '));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (const browser of Object.keys(map.browser)) {
|
|
|
|
map.browser[browser] = map.browser[browser].filter((s, i, l) => l.indexOf(s) === i && [
|
|
|
|
'Webkit',
|
|
|
|
'MAXTHON',
|
|
|
|
'conkeror',
|
|
|
|
'icecat',
|
|
|
|
'Iceweasel',
|
|
|
|
'iceweasel',
|
|
|
|
'midori',
|
|
|
|
'Palemoon',
|
2020-03-15 01:01:29 -07:00
|
|
|
'Seamonkey',
|
|
|
|
'chrome'
|
2019-06-25 03:22:49 -07:00
|
|
|
].some(k => k === s) === false);
|
|
|
|
if (map.browser[browser].length > 1) {
|
|
|
|
throw Error('Duplicated browser; add the ones that need to be removed to the list');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fs.writeFile('./map.json', JSON.stringify({
|
|
|
|
browser: Object.values(map.browser).map(k => k[0]),
|
|
|
|
os: Object.values(map.os).map(k => k[0]),
|
|
|
|
matching: map.matching
|
|
|
|
}), () => {});
|
2019-03-14 07:20:52 -07:00
|
|
|
}
|
|
|
|
};
|
|
|
|
once();
|
2019-03-12 02:50:52 -07:00
|
|
|
});
|
2019-03-14 07:20:52 -07:00
|
|
|
|