Skip to content

Instantly share code, notes, and snippets.

@IvanZelenskyy
Created July 2, 2020 22:20
Show Gist options
  • Save IvanZelenskyy/284cd77734f2f542f04acdbc4bcce1be to your computer and use it in GitHub Desktop.
Save IvanZelenskyy/284cd77734f2f542f04acdbc4bcce1be to your computer and use it in GitHub Desktop.
izberkom parser
function getDocument(url){ fetch(url).then(function (response) {
// The API call was successful!
// console.log(response);
return response.text();
}).then(function (html) {
// Convert the HTML string into a document object
var parser = new DOMParser();
var doc = parser.parseFromString(html, 'text/html');
if(doc.querySelector('.captcha')){
console.log("captcha",doc.querySelectorAll('.captcha'));
throw "сессия протухла, надо капчу вводить (((";
}
return doc;
}).catch(function (err) {
// There was an error
console.warn('не удалось загрузить документ. Ошибка такая: ', err);
throw err;
})};
function isUikLevel(doc3){
if(!doc3) throw "doc3 not exists!"
return doc3.querySelector('body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > nobr').innerText.includes("УИК");
}
function parseDoc(doc2){
var localArr = [];
console.log("doc2:", doc2);
doc2.querySelectorAll("body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > nobr > a")
.forEach(a=>localArr.push({'name':a.innerText, 'href':a.href}));
for (var i = 1; i <=localArr.length; i++) {
var s1 = "body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2)"+
" > div > table > tbody > tr:nth-child(";
var s2 = ") > td:nth-child("+i+") > nobr > b";
localArr[i-1].registered = doc2.querySelector(s1+"2"+s2).innerText;
localArr[i-1].issued = doc2.querySelector(s1+"3"+s2).innerText;
localArr[i-1].received = doc2.querySelector(s1+"4"+s2).innerText;
localArr[i-1].invalid = doc2.querySelector(s1+"5"+s2).innerText;
localArr[i-1].yes_voice = doc2.querySelector(s1+"7"+s2).innerText;
localArr[i-1].no_voice = doc2.querySelector(s1+"8"+s2).innerText;
}
return localArr;
}
var regions = parseDoc(document);
regions.forEach(r=>{
if(!r.tiks) r.tiks = [];
if(!r.uiks) r.uiks = [];
var d = getDocument(r.href);
console.log("d", r.href, d);
if(isUikLevel(d)){
r.uiks = r.uiks.concat(parseDoc(d, r.uiks));
} else{
r.tiks = r.tiks.concat(parseDoc(d));
tiks.forEach(t=>{
var dd = getDocument(t.href);
r.uiks = r.uiks.concat(parseDoc(dd));
})
}
})
console.log("RESULT!!!!", regions)
@thelensky
Copy link

провереный вариант
function getDocument(url) {
return fetch(url)
.then(function (response) {
// The API call was successful!
// console.log(response);
return response.text();
})
.then(function (html) {
// Convert the HTML string into a document object
var parser = new DOMParser();
var doc = parser.parseFromString(html, 'text/html');
if (doc.querySelector('.captcha')) {
console.log('captcha', doc.querySelectorAll('.captcha'));
throw 'сессия протухла, надо капчу вводить (((';
}
console.log("doc >>>",doc);

  return doc;
})
.catch(function (err) {
  // There was an error
  console.warn('не удалось загрузить документ. Ошибка такая: ', err);
  throw err;
});

}

function isUikLevel(doc3) {
if (!doc3) throw 'doc3 not exists!';
return doc3
.querySelector(
'body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > nobr'
)
.innerText.includes('УИК');
}

function parseDoc(doc2) {
var localArr = [];
console.log('doc2:', doc2);
doc2
.querySelectorAll(
'body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > nobr > a'
)
.forEach((a) => localArr.push({ name: a.innerText, href: a.href }));
for (var i = 1; i <= localArr.length; i++) {
var s1 =
'body > table:nth-child(5) > tbody > tr:nth-child(4) > td > div > table > tbody > tr > td:nth-child(2)' +
' > div > table > tbody > tr:nth-child(';
var s2 = ') > td:nth-child(' + i + ') > nobr > b';
localArr[i - 1].registered = doc2.querySelector(s1 + '2' + s2).innerText;
localArr[i - 1].issued = doc2.querySelector(s1 + '3' + s2).innerText;
localArr[i - 1].received = doc2.querySelector(s1 + '4' + s2).innerText;
localArr[i - 1].invalid = doc2.querySelector(s1 + '5' + s2).innerText;
localArr[i - 1].yes_voice = doc2.querySelector(s1 + '7' + s2).innerText;
localArr[i - 1].no_voice = doc2.querySelector(s1 + '8' + s2).innerText;
}
return localArr;
}

var regions = parseDoc(document);

regions.forEach((r) => {
if (!r.tiks) r.tiks = [];
if (!r.uiks) r.uiks = [];
getDocument(r.href).then
(d => {
console.log('d', r.href, d);
if (isUikLevel(d)) {
r.uiks = r.uiks.concat(parseDoc(d, r.uiks));
} else {
r.tiks = r.tiks.concat(parseDoc(d));
tiks.forEach((t) => {
var dd = getDocument(t.href);
r.uiks = r.uiks.concat(parseDoc(dd));
});
}
})
});

console.log('RESULT!!!!', regions);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment