Created
July 2, 2020 22:20
-
-
Save IvanZelenskyy/284cd77734f2f542f04acdbc4bcce1be to your computer and use it in GitHub Desktop.
izberkom parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getDocument(url){ fetch(url).then(function (response) { | |
// The API call was successful! | |
// console.log(response); | |
return response.text(); | |
}).then(function (html) { | |
// Convert the HTML string into a document object | |
var parser = new DOMParser(); | |
var doc = parser.parseFromString(html, 'text/html'); | |
if(doc.querySelector('.captcha')){ | |
console.log("captcha",doc.querySelectorAll('.captcha')); | |
throw "сессия протухла, надо капчу вводить ((("; | |
} | |
return doc; | |
}).catch(function (err) { | |
// There was an error | |
console.warn('не удалось загрузить документ. Ошибка такая: ', err); | |
throw err; | |
})}; | |
function isUikLevel(doc3){ | |
if(!doc3) throw "doc3 not exists!" | |
return doc3.querySelector('body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > nobr').innerText.includes("УИК"); | |
} | |
function parseDoc(doc2){ | |
var localArr = []; | |
console.log("doc2:", doc2); | |
doc2.querySelectorAll("body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > nobr > a") | |
.forEach(a=>localArr.push({'name':a.innerText, 'href':a.href})); | |
for (var i = 1; i <=localArr.length; i++) { | |
var s1 = "body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2)"+ | |
" > div > table > tbody > tr:nth-child("; | |
var s2 = ") > td:nth-child("+i+") > nobr > b"; | |
localArr[i-1].registered = doc2.querySelector(s1+"2"+s2).innerText; | |
localArr[i-1].issued = doc2.querySelector(s1+"3"+s2).innerText; | |
localArr[i-1].received = doc2.querySelector(s1+"4"+s2).innerText; | |
localArr[i-1].invalid = doc2.querySelector(s1+"5"+s2).innerText; | |
localArr[i-1].yes_voice = doc2.querySelector(s1+"7"+s2).innerText; | |
localArr[i-1].no_voice = doc2.querySelector(s1+"8"+s2).innerText; | |
} | |
return localArr; | |
} | |
var regions = parseDoc(document); | |
regions.forEach(r=>{ | |
if(!r.tiks) r.tiks = []; | |
if(!r.uiks) r.uiks = []; | |
var d = getDocument(r.href); | |
console.log("d", r.href, d); | |
if(isUikLevel(d)){ | |
r.uiks = r.uiks.concat(parseDoc(d, r.uiks)); | |
} else{ | |
r.tiks = r.tiks.concat(parseDoc(d)); | |
tiks.forEach(t=>{ | |
var dd = getDocument(t.href); | |
r.uiks = r.uiks.concat(parseDoc(dd)); | |
}) | |
} | |
}) | |
console.log("RESULT!!!!", regions) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
провереный вариант
function getDocument(url) {
return fetch(url)
.then(function (response) {
// The API call was successful!
// console.log(response);
return response.text();
})
.then(function (html) {
// Convert the HTML string into a document object
var parser = new DOMParser();
var doc = parser.parseFromString(html, 'text/html');
if (doc.querySelector('.captcha')) {
console.log('captcha', doc.querySelectorAll('.captcha'));
throw 'сессия протухла, надо капчу вводить (((';
}
console.log("doc >>>",doc);
}
function isUikLevel(doc3) {
if (!doc3) throw 'doc3 not exists!';
return doc3
.querySelector(
'body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > nobr'
)
.innerText.includes('УИК');
}
function parseDoc(doc2) {
var localArr = [];
console.log('doc2:', doc2);
doc2
.querySelectorAll(
'body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > nobr > a'
)
.forEach((a) => localArr.push({ name: a.innerText, href: a.href }));
for (var i = 1; i <= localArr.length; i++) {
var s1 =
'body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2)' +
' > div > table > tbody > tr:nth-child(';
var s2 = ') > td:nth-child(' + i + ') > nobr > b';
localArr[i - 1].registered = doc2.querySelector(s1 + '2' + s2).innerText;
localArr[i - 1].issued = doc2.querySelector(s1 + '3' + s2).innerText;
localArr[i - 1].received = doc2.querySelector(s1 + '4' + s2).innerText;
localArr[i - 1].invalid = doc2.querySelector(s1 + '5' + s2).innerText;
localArr[i - 1].yes_voice = doc2.querySelector(s1 + '7' + s2).innerText;
localArr[i - 1].no_voice = doc2.querySelector(s1 + '8' + s2).innerText;
}
return localArr;
}
var regions = parseDoc(document);
regions.forEach((r) => {
if (!r.tiks) r.tiks = [];
if (!r.uiks) r.uiks = [];
getDocument(r.href).then
(d => {
console.log('d', r.href, d);
if (isUikLevel(d)) {
r.uiks = r.uiks.concat(parseDoc(d, r.uiks));
} else {
r.tiks = r.tiks.concat(parseDoc(d));
tiks.forEach((t) => {
var dd = getDocument(t.href);
r.uiks = r.uiks.concat(parseDoc(dd));
});
}
})
});
console.log('RESULT!!!!', regions);