Last active
November 23, 2022 15:41
-
-
Save sergiks/320ba840028801a114bdd83a45634835 to your computer and use it in GitHub Desktop.
Fetch and parse XML in windows-1251 encoding from vk.com - getting basic user information
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Quick experiment | |
* retrieve users data as XML from VK "secret" /foaf.php endpoint | |
* and extract some values, using 2022 browser APIs. | |
* | |
* This can be run in browser console, when any page on vk.com is open | |
* otherwise a cross-domain request is disallowed. | |
* | |
* By Sergei Sokolov, | |
* Moscow, Russia, November 2022. | |
*/ | |
/** | |
* Given integer VK user ID, returns an XMLDocument with users data | |
* | |
* @param {number} id | |
* @returns {XMLDocument} | |
*/ | |
const getXMLDocument = async id => { | |
try { | |
const response = await fetch(`https://vk.com/foaf.php?id=${id}`); | |
const buffer = await response.arrayBuffer(); | |
const decoder = new TextDecoder("windows-1251"); | |
const rawXML = decoder.decode(buffer); | |
const xml = rawXML.replace(/&(?!amp;)/g, '&'); // fix VK image URLs | |
const parser = new DOMParser(); | |
const doc = parser.parseFromString(xml, 'text/xml'); | |
return doc; | |
} catch (error) { | |
console.error("Caught error:", error); | |
} | |
} | |
/** | |
* Find some nodes and extract their data | |
* @param {XMLDocument} xmlDocument | |
* @returns {Object} data | |
*/ | |
const extractData = xmlDocument => { | |
const getDate = tagName => xmlDocument.getElementsByTagName(tagName)[0].getAttribute('dc:date'); | |
const getText = tagName => xmlDocument.getElementsByTagName(tagName)[0].textContent; | |
const lastLoggedIn = getDate('ya:lastLoggedIn'); | |
const created = getDate('ya:created'); | |
const firstName = getText('ya:firstName'); | |
const secondName = getText('ya:secondName'); | |
const gender = getText('foaf:gender'); | |
return { lastLoggedIn, created, firstName, secondName, gender }; | |
} | |
/** | |
* Workflow run | |
* @param {number} id - VK user ID | |
*/ | |
const foaf = async id => { | |
const doc = await getXMLDocument(id); | |
if (!doc) return; | |
console.log(extractData(doc)); | |
}; | |
foaf(1); // 1 is Pavel Durov's VK id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment