Created
March 14, 2024 10:54
-
-
Save irsdl/cf57eb773344ffb0c1b20a417d069bd3 to your computer and use it in GitHub Desktop.
To evaluate how `URL(url).hostname` in JS handles discarded characters and character conversions in domain names.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// by @irsdl | |
// This script identifies anomalies in how JS parses the URL using `URL(url).hostname`: | |
// 1- Characters that are ignored when present in the domain name. | |
// 2- Characters that can replace ASCII characters in domain names and still be parsed correctly. In here we want letter S in `soroush.me` | |
// You can try running this script in your browser's dev console or at https://www.jdoodle.com/execute-nodejs-online/ | |
// I am sure this must have been looked at before but I cannot find a reference | |
for (let i = 0; i <= 0xFFFF; i++) { | |
const unicodeChar = String.fromCharCode(i); | |
const urlString = `http://sorous${unicodeChar}h.me/blog/`; | |
try { | |
const { hostname } = new URL(urlString); | |
if (hostname === 'soroush.me') { | |
console.log(`Ignored Unicode: ${unicodeChar.charCodeAt(0).toString(16).padStart(4, '0').toUpperCase()}`); | |
} | |
} catch (error) { | |
} | |
} | |
for (let i = 0; i <= 0xFFFF; i++) { | |
const unicodeChar = String.fromCharCode(i); | |
const urlString = `http://${unicodeChar}oroush.me/blog/`; | |
try { | |
const { hostname } = new URL(urlString); | |
if (hostname === 'soroush.me') { | |
console.log(`Unicode for letter S: ${unicodeChar.charCodeAt(0).toString(16).padStart(4, '0').toUpperCase()}`); | |
} | |
} catch (error) { | |
} | |
} | |
/* | |
Expected results: | |
Ignored Unicode: 0009 | |
Ignored Unicode: 000A | |
Ignored Unicode: 000D | |
Ignored Unicode: 00AD | |
Ignored Unicode: 034F | |
Ignored Unicode: 180B | |
Ignored Unicode: 180C | |
Ignored Unicode: 180D | |
Ignored Unicode: 180F | |
Ignored Unicode: 200B | |
Ignored Unicode: 2060 | |
Ignored Unicode: 2064 | |
Ignored Unicode: FE00 | |
Ignored Unicode: FE01 | |
Ignored Unicode: FE02 | |
Ignored Unicode: FE03 | |
Ignored Unicode: FE04 | |
Ignored Unicode: FE05 | |
Ignored Unicode: FE06 | |
Ignored Unicode: FE07 | |
Ignored Unicode: FE08 | |
Ignored Unicode: FE09 | |
Ignored Unicode: FE0A | |
Ignored Unicode: FE0B | |
Ignored Unicode: FE0C | |
Ignored Unicode: FE0D | |
Ignored Unicode: FE0E | |
Ignored Unicode: FE0F | |
Ignored Unicode: FEFF | |
Unicode for letter S: 0053 | |
Unicode for letter S: 0073 | |
Unicode for letter S: 017F | |
Unicode for letter S: 02E2 | |
Unicode for letter S: 209B | |
Unicode for letter S: 24C8 | |
Unicode for letter S: 24E2 | |
Unicode for letter S: FF33 | |
Unicode for letter S: FF53 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Better output:
Result: