Last active
February 14, 2025 07:00
-
-
Save Arkanic/aabf6324075a1534e785729e85baf8d3 to your computer and use it in GitHub Desktop.
store arbritrary data in unicode emoji, https://paulbutler.org/2025/smuggling-arbitrary-data-through-an-emoji/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <wchar.h> | |
#include <locale.h> | |
wchar_t byte_to_variant(char byte) { | |
if(byte < 16) { | |
return 0xfe00 + (uint32_t)byte; | |
} else { | |
return 0xe0100 + (uint32_t)(byte - 16); | |
} | |
} | |
char variant_to_byte(wchar_t wc) { | |
uint32_t variant = (uint32_t)wc; | |
if(variant >= 0xfe00 && variant <= 0xfe0f) { | |
return (char)(variant - 0xfe00); | |
} else if(variant >= 0xe0100 && variant <= 0xe01ef) { | |
return (char)(variant - 0xe0100 + 16); | |
} else { | |
return 'X'; | |
} | |
} | |
wchar_t *encode(wchar_t base, char *bytes) { | |
unsigned int length = strlen(bytes); | |
wchar_t *result = (wchar_t *)malloc(sizeof(wchar_t) * (length + 1)); // already have \0 from end of bytes | |
result[0] = base; | |
for(unsigned int i = 1; i < length + 1; i++) { | |
result[i] = byte_to_variant(bytes[i - 1]); | |
} | |
return result; | |
} | |
char *decode(wchar_t *emoji) { | |
unsigned int length = wcslen(emoji); | |
char *result = (char *)malloc(sizeof(char) * (length - 1)); | |
for(unsigned int i = 1; i < length; i++) { | |
result[i - 1] = variant_to_byte(emoji[i]); | |
} | |
result[length - 1] = '\0'; | |
return result; | |
} | |
void help_and_exit(char *pname) { | |
fprintf(stderr, "usage: %s encode blahblahblah\n %s decode *\n", pname, pname); | |
exit(1); | |
} | |
int main(int argc, char *argv[]) { | |
setlocale(LC_CTYPE, ""); | |
if(argc < 2) help_and_exit(argv[0]); | |
if(strcmp(argv[1], "encode") == 0) { | |
if(argc < 3) help_and_exit(argv[0]); | |
wchar_t smiley = L'😊'; | |
wchar_t *encoded = encode(smiley, argv[2]); | |
wprintf(L"%ls\n", encoded); | |
free(encoded); | |
return 0; | |
} else if(strcmp(argv[1], "decode") == 0) { | |
if(argc < 3) help_and_exit(argv[0]); | |
size_t mbslen = mbstowcs(NULL, argv[2], 0); | |
wchar_t *encoded = malloc(sizeof(wchar_t) * (mbslen + 1)); | |
mbstowcs(encoded, argv[2], mbslen + 1); | |
char *decoded = decode(encoded); | |
printf("%s\n", decoded); | |
free(encoded); | |
free(decoded); | |
return 0; | |
} else { | |
help_and_exit(argv[0]); | |
} | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment