Skip to content

Instantly share code, notes, and snippets.

@jsonzilla
Created May 12, 2025 13:18
Show Gist options
  • Save jsonzilla/881c012a0fce70c579d426c30a3a36ff to your computer and use it in GitHub Desktop.
Save jsonzilla/881c012a0fce70c579d426c30a3a36ff to your computer and use it in GitHub Desktop.
What This Can't Do It can't guarantee something is "ANSI" — it just tells you "not valid UTF-8", so we assume ANSI. If the string contains only ASCII characters, both UTF-8 and ANSI are valid — you can't distinguish.
#include <iostream>
#include <string>
#include <locale>
#include <codecvt>
std::string ansiToUtf8(const std::string& ansiStr) {
// Convert ANSI (assumed to be Latin1/Windows-1252) to UTF-16
std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>> converter;
std::wstring wideStr = converter.from_bytes(ansiStr);
// Convert UTF-16 (wide string) to UTF-8
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utf8Converter;
return utf8Converter.to_bytes(wideStr);
}
#include <iostream>
#include <string>
#include <locale>
#include <codecvt>
bool isValidUtf8(const std::string& str) {
size_t i = 0;
while (i < str.size()) {
unsigned char c = static_cast<unsigned char>(str[i]);
int bytes = 0;
if ((c & 0x80) == 0) {
bytes = 1; // ASCII
} else if ((c & 0xE0) == 0xC0) {
bytes = 2;
} else if ((c & 0xF0) == 0xE0) {
bytes = 3;
} else if ((c & 0xF8) == 0xF0) {
bytes = 4;
} else {
return false; // invalid leading byte
}
if (i + bytes > str.size()) return false;
for (int j = 1; j < bytes; ++j) {
if ((static_cast<unsigned char>(str[i + j]) & 0xC0) != 0x80) {
return false; // invalid continuation byte
}
}
i += bytes;
}
return true;
}
int main() {
std::string maybeAnsi = "\xE9"; // ANSI 'é'
std::string converted = ansiToUtf8(maybeAnsi);
std::cout << "UTF-8: " << converted << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment