Skip to content

Instantly share code, notes, and snippets.

@valoricDe
Last active April 2, 2023 17:37

Revisions

  1. valoricDe revised this gist Apr 2, 2023. 1 changed file with 6 additions and 3 deletions.
    9 changes: 6 additions & 3 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -11,8 +11,7 @@ async function getHtmlDocumentById(id) {
    }

    function JSDOM(html) {
    document.documentElement.innerHTML = ''
    document.write(html)
    document.documentElement.innerHTML = html
    return { window: { document: window.document }};
    }

    @@ -52,13 +51,17 @@ function extractData(document) {

    const waitMs = ms => new Promise(res => setTimeout(res, ms))

    window.collection = []

    async function exportDatabase(max = 50, offset = 0) {
    for(let id = offset; id < offset+max; id++) {
    try {
    const doc = await getHtmlDocumentById(id)
    const data = extractData(doc)
    console.log(data)
    window.collection.push(data)
    await waitMs(100)
    } catch(e) { console.warn(e) }
    }

    document.documentElement.innerHTML = JSON.stringify(window.collection)
    }
  2. valoricDe revised this gist Apr 2, 2023. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -11,6 +11,7 @@ async function getHtmlDocumentById(id) {
    }

    function JSDOM(html) {
    document.documentElement.innerHTML = ''
    document.write(html)
    return { window: { document: window.document }};
    }
    @@ -49,11 +50,15 @@ function extractData(document) {
    }
    }

    const waitMs = ms => new Promise(res => setTimeout(res, ms))

    async function exportDatabase(max = 50, offset = 0) {
    for(let id = offset; id < offset+max; id++) {
    try {
    const doc = await getHtmlDocumentById(id)
    const data = extractData(doc)
    console.log(data)
    await waitMs(100)
    } catch(e) { console.warn(e) }
    }
    }
  3. valoricDe revised this gist Apr 2, 2023. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion main.js
    Original file line number Diff line number Diff line change
    @@ -51,7 +51,7 @@ function extractData(document) {


    async function exportDatabase(max = 50, offset = 0) {
    for(let id = offset; id < max; id++) {
    for(let id = offset; id < offset+max; id++) {
    const doc = await getHtmlDocumentById(id)
    const data = extractData(doc)
    console.log(data)
  4. valoricDe revised this gist Apr 2, 2023. 1 changed file with 7 additions and 1 deletion.
    8 changes: 7 additions & 1 deletion main.js
    Original file line number Diff line number Diff line change
    @@ -37,8 +37,13 @@ function extractData(document) {
    Stadt: getInputValue(document, '[name=City]'),
    Stadtteil: getInputValue(document, '[name=District]'),
    Ansprechpartner: getInputValue(document, '[name=MainContactName]'),
    AnsprechpartnerAnrede: getSelectOptionLabel(document, '[name=MainContactSalutation]'),
    Leitung: getInputValue(document, '[name=DirectorName]'),
    LeitungAnrede: getSelectOptionLabel(document, '[name=DirectorSalutation]'),
    StellvLeitung: getInputValue(document, '[name=DevDirectorName]'),
    StellvLeitungAnrede: getSelectOptionLabel(document, '[name=DevDirectorSalutation]'),
    Vorwahl: getInputValue(document, '[name=DialingCode]'),
    Telefon: getInputValue(document, '[name=PhoneNumber]'),
    Telefon: getInputValue(document, '[name=Phonenumber]'),
    Email: getInputValue(document, '[name=EmailAddress]'),
    Website: getInputValue(document, '[name=Website]'),
    }
    @@ -49,5 +54,6 @@ async function exportDatabase(max = 50, offset = 0) {
    for(let id = offset; id < max; id++) {
    const doc = await getHtmlDocumentById(id)
    const data = extractData(doc)
    console.log(data)
    }
    }
  5. valoricDe revised this gist Apr 2, 2023. 1 changed file with 2 additions and 4 deletions.
    6 changes: 2 additions & 4 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -11,10 +11,8 @@ async function getHtmlDocumentById(id) {
    }

    function JSDOM(html) {
    var template = document.createElement('template');
    html = html.trim(); // Never return a text node of whitespace as the result
    template.innerHTML = html;
    return { window: { document: template.content.firstChild }};
    document.write(html)
    return { window: { document: window.document }};
    }

    function getInputValue(document, selector) {
  6. valoricDe revised this gist Apr 2, 2023. 1 changed file with 1 addition and 13 deletions.
    14 changes: 1 addition & 13 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -1,18 +1,6 @@
    async function getHtmlDocumentById(id) {
    const baseUrl = 'http://portal.fotoraabe.de'
    const response = await fetch(
    `${baseUrl}/EditCustomer?id=${id}`,
    {
    headers: {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:87.0) Gecko/20100101 Firefox/87.0',
    Accept: '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    },
    referrer: `${baseUrl}`,
    method: 'GET'
    }
    )
    const response = await fetch(`${baseUrl}/EditCustomer?id=${id}`)
    if (response.status >= 400) throw Error(await response.text())

    const html = await response.text()
  7. valoricDe revised this gist Apr 2, 2023. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -10,8 +10,7 @@ async function getHtmlDocumentById(id) {
    'Accept-Language': 'en-US,en;q=0.5',
    },
    referrer: `${baseUrl}`,
    method: 'GET',
    mode: 'cors',
    method: 'GET'
    }
    )
    if (response.status >= 400) throw Error(await response.text())
  8. valoricDe revised this gist Apr 2, 2023. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion main.js
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,6 @@ async function getHtmlDocumentById(id) {
    const response = await fetch(
    `${baseUrl}/EditCustomer?id=${id}`,
    {
    credentials: 'omit',
    headers: {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:87.0) Gecko/20100101 Firefox/87.0',
  9. valoricDe revised this gist Apr 2, 2023. 1 changed file with 2 additions and 8 deletions.
    10 changes: 2 additions & 8 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -61,15 +61,9 @@ function extractData(document) {
    }


    await function exportDatabase() {
    const max = 50;

    for(let id = 0; id < max; id++) {
    async function exportDatabase(max = 50, offset = 0) {
    for(let id = offset; id < max; id++) {
    const doc = await getHtmlDocumentById(id)
    const data = extractData(doc)
    }
    }

    async function main() {
    await exportDatabase(5, 33)
    }
  10. valoricDe revised this gist Apr 2, 2023. 1 changed file with 6 additions and 2 deletions.
    8 changes: 6 additions & 2 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -61,11 +61,15 @@ function extractData(document) {
    }


    function exportDatabase() {
    await function exportDatabase() {
    const max = 50;

    for(let id = 0; id < max; id++) {
    const doc = getHtmlDocumentById(id)
    const doc = await getHtmlDocumentById(id)
    const data = extractData(doc)
    }
    }

    async function main() {
    await exportDatabase(5, 33)
    }
  11. valoricDe revised this gist Apr 2, 2023. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -24,6 +24,12 @@ async function getHtmlDocumentById(id) {
    return document
    }

    function JSDOM(html) {
    var template = document.createElement('template');
    html = html.trim(); // Never return a text node of whitespace as the result
    template.innerHTML = html;
    return { window: { document: template.content.firstChild }};
    }

    function getInputValue(document, selector) {
    const el = document.querySelector(selector)
  12. valoricDe revised this gist Apr 2, 2023. 1 changed file with 14 additions and 6 deletions.
    20 changes: 14 additions & 6 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    async function getHtmlDocumentById(id: number | string) {
    async function getHtmlDocumentById(id) {
    const baseUrl = 'http://portal.fotoraabe.de'
    const response = await fetch(
    `${baseUrl}/EditCustomer?id=${id}`,
    @@ -25,8 +25,15 @@ async function getHtmlDocumentById(id: number | string) {
    }


    const getInputValue = (document, selector) => document.querySelector(selector) as HTMLInputElement)?.value
    const getSelectOptionLabel = (document, selector) => (document.querySelector(selector) as HTMLSelectElement)?.selectedOptions?.[0].label
    function getInputValue(document, selector) {
    const el = document.querySelector(selector)
    return el && el.value || null
    }

    function getSelectOptionLabel(document, selector) {
    const el = document.querySelector(selector)
    return el && el.selectedOptions[0].label || null
    }

    function extractData(document) {
    return {
    @@ -48,10 +55,11 @@ function extractData(document) {
    }


    function exportDatabase(max = 50, offset = 0) {
    for(let id = offset; id < max; id++) {
    function exportDatabase() {
    const max = 50;

    for(let id = 0; id < max; id++) {
    const doc = getHtmlDocumentById(id)
    const data = extractData(doc)
    console.log(data)
    }
    }
  13. valoricDe revised this gist Apr 2, 2023. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -48,8 +48,8 @@ function extractData(document) {
    }


    function exportDatabase(offset, max) {
    for(let id = 0; id < max; id++) {
    function exportDatabase(max = 50, offset = 0) {
    for(let id = offset; id < max; id++) {
    const doc = getHtmlDocumentById(id)
    const data = extractData(doc)
    console.log(data)
  14. valoricDe revised this gist Apr 2, 2023. 1 changed file with 2 additions and 3 deletions.
    5 changes: 2 additions & 3 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -48,11 +48,10 @@ function extractData(document) {
    }


    function exportDatabase() {
    const max = 50;

    function exportDatabase(offset, max) {
    for(let id = 0; id < max; id++) {
    const doc = getHtmlDocumentById(id)
    const data = extractData(doc)
    console.log(data)
    }
    }
  15. valoricDe revised this gist Apr 2, 2023. 1 changed file with 48 additions and 31 deletions.
    79 changes: 48 additions & 31 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -1,41 +1,58 @@
    async function getHtmlDocumentById(id: number | string) {
    const baseUrl = 'https://www.whiskybase.com'
    const response = await fetch(
    `${baseUrl}/EditCustomer?id=${id}`,
    {
    credentials: 'omit',
    headers: {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:87.0) Gecko/20100101 Firefox/87.0',
    Accept: '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    },
    referrer: `${baseUrl}`,
    method: 'GET',
    mode: 'cors',
    }
    )
    if (response.status >= 400) throw Error(await response.text())
    const baseUrl = 'http://portal.fotoraabe.de'
    const response = await fetch(
    `${baseUrl}/EditCustomer?id=${id}`,
    {
    credentials: 'omit',
    headers: {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:87.0) Gecko/20100101 Firefox/87.0',
    Accept: '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    },
    referrer: `${baseUrl}`,
    method: 'GET',
    mode: 'cors',
    }
    )
    if (response.status >= 400) throw Error(await response.text())

    const html = await response.text()
    const {
    window: { document },
    } = new JSDOM(html)
    return document
    const html = await response.text()
    const {
    window: { document },
    } = new JSDOM(html)
    return document
    }


    const getInputValue = (document, selector) => document.querySelector(selector) as HTMLInputElement)?.value
    const getSelectOptionLabel = (document, selector) => (document.querySelector(selector) as HTMLSelectElement)?.selectedOptions?.[0].label

    function extractData(document) {
    const currentPrice = document.querySelector(
    '.block-price p:last-child'
    )?.textContent
    return {
    Name: getInputValue(document, '[name=Name]'),
    Art: getSelectOptionLabel(document, '[name=CustomerType]'),
    Groesse: getInputValue(document, '[name=Dimension]'),
    Nation: getInputValue(document, '[name=Nation]'),
    Bundesland: getInputValue(document, '[name=State]'),
    Strasse: getInputValue(document, '[name=Street]'),
    PLZ: getInputValue(document, '[name=ZipCode]'),
    Stadt: getInputValue(document, '[name=City]'),
    Stadtteil: getInputValue(document, '[name=District]'),
    Ansprechpartner: getInputValue(document, '[name=MainContactName]'),
    Vorwahl: getInputValue(document, '[name=DialingCode]'),
    Telefon: getInputValue(document, '[name=PhoneNumber]'),
    Email: getInputValue(document, '[name=EmailAddress]'),
    Website: getInputValue(document, '[name=Website]'),
    }
    }


    function exportDatabase() {
    const max = 5000;
    for(let id = 0; id < max; id++) {
    const doc = getHtmlDocumentById(id)
    const data = extractData(doc)
    }
    const max = 50;

    for(let id = 0; id < max; id++) {
    const doc = getHtmlDocumentById(id)
    const data = extractData(doc)
    }
    }
  16. valoricDe created this gist Apr 2, 2023.
    41 changes: 41 additions & 0 deletions main.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,41 @@
    async function getHtmlDocumentById(id: number | string) {
    const baseUrl = 'https://www.whiskybase.com'
    const response = await fetch(
    `${baseUrl}/EditCustomer?id=${id}`,
    {
    credentials: 'omit',
    headers: {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:87.0) Gecko/20100101 Firefox/87.0',
    Accept: '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    },
    referrer: `${baseUrl}`,
    method: 'GET',
    mode: 'cors',
    }
    )
    if (response.status >= 400) throw Error(await response.text())

    const html = await response.text()
    const {
    window: { document },
    } = new JSDOM(html)
    return document
    }

    function extractData(document) {
    const currentPrice = document.querySelector(
    '.block-price p:last-child'
    )?.textContent
    }


    function exportDatabase() {
    const max = 5000;

    for(let id = 0; id < max; id++) {
    const doc = getHtmlDocumentById(id)
    const data = extractData(doc)
    }
    }