dutchLuck · February 22, 2025 06:12
diff --git a/try_1_utf8_char.rs b/try_1_utf8_char.rs
 //
 // T R Y _ 1 _ U T F 8 _ C H A R
 //
 // try_1_utf8_char.rs last edited on Sat Feb 22 16:09:31 2025
 //
 // Display a character from the UTF-8 bytes (or Unicode code point) specified
 // on the command line.
 //
 // N.B. 1. The quickest and easiest access to similar, and probably
 //      better, functionality as this rust code is to use the following
 //      webpage: https://www.cogsci.ed.ac.uk/~richard/utf-8.html
 //      with the disclaimer that I only tried out the webpage briefly
 //      and did that on a Mac using Safari.
 //
 //      2. If you know the Unicode Code Point instead of the UTF-8 byte
 //      sequence and you are on a mac then the "Programmer" mode of the
 //      calculator app will take the code point in hex and display the
 //      character. For example selecting the "Unicode" botton and entering
 //      "0x1F980" will display the crab emoji over on the left hand side
 //      of the calculater display.
 //
 // Warning: try_1_utf8_char lacks adequate command line parameter checking
 //
 // Copilot AI generated code Oct-Nov 2024
 //
 // This is the outcome of quite a few query iterations
 // to get a quick test utility through Copilot AI
 //
 // Please heed the warning shown at the bottom of the Copilot
 // response to my code generation requests; -
 // "AI-generated code. Review and use carefully. More info on FAQ."
 //
 // Else-where are warning statements such as; -
 // "Copilot uses AI. Check for mistakes."
 //
 // Please consider yourself duly warned.
 //
 //
 // Recipe to build this code - assuming Rust is already installed
 // 1. Change into a convenient folder. i.e. cd rust
 // 2. Initialize the sub-folder to place this code in.
 //    i.e. cargo init try_1_utf8_char
 //    where try_1_utf8_char is this the name chosen for this quick test utility
 // 3. Change into the try_1_utf8_char folder. i.e. cd try_1_utf8_char
 // 4. Build and run the Hello, world! sample already available. i.e. cargo run
 // 5. Copy this source file over the top of src/main.rs
 //    i.e. cp path_to_wherever_this_source_is/try_1_utf8_char.rs src/main.rs
 // 6. A Non-essential nicety is to edit the Cargo.toml file to set the version
 //    number, so the Cargo.toml becomes; -
 // [package]
 // name = "try_1_utf8_char"
 // version = "0.1.2"
 // edition = "2021"
 //
 // [dependencies]
 // 7. Build this try_1_utf8_char utility. i.e. cargo build
 // 8. Try out UTF-8 mode by running the code to display a Crab Emoji; -
 //    e.g. cargo run -- 0xf0 0x9f 0xa6 0x80
 //    On a mac the output for the above command is; -
 // UTF-32 Code Point: U+0001F980 (equivalent decimal is: 129408)
 // UTF-8 Hex Byte string: [F0, 9F, A6, 80] (equivalent decimal string is: [240, 159, 166, 128])
 // Unicode Character: 🦀
 //
 //    N.B. This utility probaby will not appear to work quite so
 //    well on linux (Ubuntu 24.4 on WSL) or MS Windows (cmd or
 //    powershell) where the emoji will just be shown as a little
 //    box. However cut and paste the results into a Unicode aware
 //    editor like Visual Studio Code and the crab emoji will show
 //    up in the editor.
 //
 // 9. Try out the Unicode code point mode to display the Greek
 //    letter Pi, with the following command; -
 //    cargo run -- 0x3c0
 //    The output is; -
 // UTF-32 Code Point: U+000003C0 (equivalent decimal is: 960)
 // UTF-8 Hex Byte string: [CF, 80] (equivalent decimal string is: [207, 128])
 // Unicode Character: π
 //

 use std::env;
 use std::str;

 fn parse_argument(arg: &str) -> Result<u32, std::num::ParseIntError> {
    if arg.starts_with("0x") || arg.starts_with("0X") {
        u32::from_str_radix(&arg[2..], 16)
    } else {
        arg.parse()
    }
 }

 fn u8_array_to_utf32_hex(input: &[u8]) -> String {
    let utf8_string = String::from_utf8(input.to_vec()).expect("Invalid UTF-8");

    utf8_string.chars()
        .map(|c| format!("U+{:08X}", c as u32))
        .collect::<Vec<String>>()
        .join(" ")
 }

 fn display_bytes(bytes: Vec<u8>) {
    let utf32_hex_string = u8_array_to_utf32_hex(&bytes);
    let num = u32::from_str_radix(&utf32_hex_string[2..], 16).expect("Invalid Hex representation of UTF-8");
    print!("UTF-32 Code Point: {}", utf32_hex_string);
    println!(" (equivalent decimal is: {:?})", num );
    print!("UTF-8 Hex Byte string: {:02X?}", &bytes);
    println!(" (equivalent decimal string is: {:?})", &bytes );

    match str::from_utf8(&bytes) {
        Ok(utf8_str) => {
            let ch = utf8_str.chars().next().unwrap();
            println!("Unicode Character: {}", ch);
        }
        Err(e) => {
            eprintln!("Code point {:?}", e);
            println!("Couldn't convert code point {:?} to a character", bytes);
        }
    }
 }

 fn main() {
    let args: Vec<String> = env::args().skip(1).collect();

    if args.len() == 1 {
        if let Ok(code_point) = parse_argument(&args[0]) {
            if code_point > 127 {
                let utf8_string = char::from_u32(code_point).unwrap().to_string();
                let bytes = utf8_string.into_bytes();
                display_bytes(bytes);
                return;
            }
        }
    }

    let bytes: Vec<u8> = args.iter().map(|arg| parse_argument(arg).expect("Invalid number") as u8).collect();
    display_bytes(bytes);
 }
	//
	// T R Y _ 1 _ U T F 8 _ C H A R
	//
	// try_1_utf8_char.rs last edited on Sat Feb 22 16:09:31 2025
	//
	// Display a character from the UTF-8 bytes (or Unicode code point) specified
	// on the command line.
	//
	// N.B. 1. The quickest and easiest access to similar, and probably
	// better, functionality as this rust code is to use the following
	// webpage: https://www.cogsci.ed.ac.uk/~richard/utf-8.html
	// with the disclaimer that I only tried out the webpage briefly
	// and did that on a Mac using Safari.
	//
	// 2. If you know the Unicode Code Point instead of the UTF-8 byte
	// sequence and you are on a mac then the "Programmer" mode of the
	// calculator app will take the code point in hex and display the
	// character. For example selecting the "Unicode" botton and entering
	// "0x1F980" will display the crab emoji over on the left hand side
	// of the calculater display.
	//
	// Warning: try_1_utf8_char lacks adequate command line parameter checking
	//
	// Copilot AI generated code Oct-Nov 2024
	//
	// This is the outcome of quite a few query iterations
	// to get a quick test utility through Copilot AI
	//
	// Please heed the warning shown at the bottom of the Copilot
	// response to my code generation requests; -
	// "AI-generated code. Review and use carefully. More info on FAQ."
	//
	// Else-where are warning statements such as; -
	// "Copilot uses AI. Check for mistakes."
	//
	// Please consider yourself duly warned.
	//
	//
	// Recipe to build this code - assuming Rust is already installed
	// 1. Change into a convenient folder. i.e. cd rust
	// 2. Initialize the sub-folder to place this code in.
	// i.e. cargo init try_1_utf8_char
	// where try_1_utf8_char is this the name chosen for this quick test utility
	// 3. Change into the try_1_utf8_char folder. i.e. cd try_1_utf8_char
	// 4. Build and run the Hello, world! sample already available. i.e. cargo run
	// 5. Copy this source file over the top of src/main.rs
	// i.e. cp path_to_wherever_this_source_is/try_1_utf8_char.rs src/main.rs
	// 6. A Non-essential nicety is to edit the Cargo.toml file to set the version
	// number, so the Cargo.toml becomes; -
	// [package]
	// name = "try_1_utf8_char"
	// version = "0.1.2"
	// edition = "2021"
	//
	// [dependencies]
	// 7. Build this try_1_utf8_char utility. i.e. cargo build
	// 8. Try out UTF-8 mode by running the code to display a Crab Emoji; -
	// e.g. cargo run -- 0xf0 0x9f 0xa6 0x80
	// On a mac the output for the above command is; -
	// UTF-32 Code Point: U+0001F980 (equivalent decimal is: 129408)
	// UTF-8 Hex Byte string: [F0, 9F, A6, 80] (equivalent decimal string is: [240, 159, 166, 128])
	// Unicode Character: 🦀
	//
	// N.B. This utility probaby will not appear to work quite so
	// well on linux (Ubuntu 24.4 on WSL) or MS Windows (cmd or
	// powershell) where the emoji will just be shown as a little
	// box. However cut and paste the results into a Unicode aware
	// editor like Visual Studio Code and the crab emoji will show
	// up in the editor.
	//
	// 9. Try out the Unicode code point mode to display the Greek
	// letter Pi, with the following command; -
	// cargo run -- 0x3c0
	// The output is; -
	// UTF-32 Code Point: U+000003C0 (equivalent decimal is: 960)
	// UTF-8 Hex Byte string: [CF, 80] (equivalent decimal string is: [207, 128])
	// Unicode Character: π
	//

	use std::env;
	use std::str;

	fn parse_argument(arg: &str) -> Result<u32, std::num::ParseIntError> {
	if arg.starts_with("0x") \|\| arg.starts_with("0X") {
	u32::from_str_radix(&arg[2..], 16)
	} else {
	arg.parse()
	}
	}

	fn u8_array_to_utf32_hex(input: &[u8]) -> String {
	let utf8_string = String::from_utf8(input.to_vec()).expect("Invalid UTF-8");

	utf8_string.chars()
	.map(\|c\| format!("U+{:08X}", c as u32))
	.collect::<Vec<String>>()
	.join(" ")
	}

	fn display_bytes(bytes: Vec<u8>) {
	let utf32_hex_string = u8_array_to_utf32_hex(&bytes);
	let num = u32::from_str_radix(&utf32_hex_string[2..], 16).expect("Invalid Hex representation of UTF-8");
	print!("UTF-32 Code Point: {}", utf32_hex_string);
	println!(" (equivalent decimal is: {:?})", num );
	print!("UTF-8 Hex Byte string: {:02X?}", &bytes);
	println!(" (equivalent decimal string is: {:?})", &bytes );

	match str::from_utf8(&bytes) {
	Ok(utf8_str) => {
	let ch = utf8_str.chars().next().unwrap();
	println!("Unicode Character: {}", ch);
	}
	Err(e) => {
	eprintln!("Code point {:?}", e);
	println!("Couldn't convert code point {:?} to a character", bytes);
	}
	}
	}

	fn main() {
	let args: Vec<String> = env::args().skip(1).collect();

	if args.len() == 1 {
	if let Ok(code_point) = parse_argument(&args[0]) {
	if code_point > 127 {
	let utf8_string = char::from_u32(code_point).unwrap().to_string();
	let bytes = utf8_string.into_bytes();
	display_bytes(bytes);
	return;
	}
	}
	}

	let bytes: Vec<u8> = args.iter().map(\|arg\| parse_argument(arg).expect("Invalid number") as u8).collect();
	display_bytes(bytes);
	}