yeiichi · June 20, 2025 23:18
diff --git a/number_chunks_extractor.py b/number_chunks_extractor.py
 #!/usr/bin/env python3
 import re
 import unicodedata
 from typing import List

 # Constants
 KANJI_DIGITS = '〇零一壱二弐三参四五伍六七八九'
 ARABIC_DIGITS = '001122334556789'
 KANJI_TO_ARABIC_MAPPING = str.maketrans(KANJI_DIGITS, ARABIC_DIGITS)
 UNICODE_NORMALIZATION_FORM = 'NFKC'


 def kanji_to_arabic(input_string: str) -> str:
    """
    Converts Kanji numerals in the input string to Arabic numerals.
    Args:
        input_string (str): The string containing Kanji numerals.
    Returns:
        str: The string with Kanji numerals converted to Arabic numerals.
    """
    return input_string.translate(KANJI_TO_ARABIC_MAPPING)


 def extract_number_chunks(input_string: str) -> List[str]:
    """
    Extracts numeric chunks from a string after normalizing it.
    Args:
        input_string (str): The input string.
    Returns:
        List[str]: A list of extracted numbers as strings.
    """
    normalized_string = unicodedata.normalize(UNICODE_NORMALIZATION_FORM, input_string)
    return re.findall(r'\d+', normalized_string)


 def main(input_string: str):
    return extract_number_chunks(kanji_to_arabic(input_string))


 if __name__ == '__main__':
    test_string = '001-1223鶴の壱五四八番_455/67::89'
    expected_result = ['001', '1223', '1548', '455', '67', '89']
    print("Test String:", test_string)
    print("Expected result:", expected_result)
    result = main(test_string)
    assert result == expected_result, "Assertion failed!"
    print("Assertion passed!")  # Optional: Feedback that assertion was successful
	#!/usr/bin/env python3
	import re
	import unicodedata
	from typing import List

	# Constants
	KANJI_DIGITS = '〇零一壱二弐三参四五伍六七八九'
	ARABIC_DIGITS = '001122334556789'
	KANJI_TO_ARABIC_MAPPING = str.maketrans(KANJI_DIGITS, ARABIC_DIGITS)
	UNICODE_NORMALIZATION_FORM = 'NFKC'


	def kanji_to_arabic(input_string: str) -> str:
	"""
	Converts Kanji numerals in the input string to Arabic numerals.
	Args:
	input_string (str): The string containing Kanji numerals.
	Returns:
	str: The string with Kanji numerals converted to Arabic numerals.
	"""
	return input_string.translate(KANJI_TO_ARABIC_MAPPING)


	def extract_number_chunks(input_string: str) -> List[str]:
	"""
	Extracts numeric chunks from a string after normalizing it.
	Args:
	input_string (str): The input string.
	Returns:
	List[str]: A list of extracted numbers as strings.
	"""
	normalized_string = unicodedata.normalize(UNICODE_NORMALIZATION_FORM, input_string)
	return re.findall(r'\d+', normalized_string)


	def main(input_string: str):
	return extract_number_chunks(kanji_to_arabic(input_string))


	if __name__ == '__main__':
	test_string = '001-1223鶴の壱五四八番_455/67::89'
	expected_result = ['001', '1223', '1548', '455', '67', '89']
	print("Test String:", test_string)
	print("Expected result:", expected_result)
	result = main(test_string)
	assert result == expected_result, "Assertion failed!"
	print("Assertion passed!") # Optional: Feedback that assertion was successful