Skip to content

Instantly share code, notes, and snippets.

@sgronblo
Forked from Kimtaro/kanji_to_number.rb
Created June 19, 2012 06:55

Revisions

  1. sgronblo revised this gist Jun 19, 2012. 1 changed file with 29 additions and 65 deletions.
    94 changes: 29 additions & 65 deletions kanji_to_number.rb
    Original file line number Diff line number Diff line change
    @@ -1,73 +1,37 @@
    # Encoding: UTF-8

    module KanjiToNumber
    POS_FOR_DEN = {'十' => 1, '百' => 2, '千' => 3, '万' => 4}
    NUM_FOR_NUM = {'壱' => '1', '一' => '1', '1' => '1',
    '弐' => '2', '二' => '2', '2' => '2',
    '参' => '3', '三' => '3', '3' => '3',
    '四' => '4', '4' => '4',
    '五' => '5', '5' => '5',
    '六' => '6', '6' => '6',
    '七' => '7', '7' => '7',
    '八' => '8', '8' => '8',
    '九' => '9', '9' => '9',
    '零' => '0', '〇' => '0', '0' => '0'}

    def self.convert(kanji)
    number = ''
    letters = kanji.chars.to_a.reverse

    letters.each_with_index do |letter, i|
    if NUM_FOR_NUM.has_key?(letter)
    number = "#{NUM_FOR_NUM[letter]}#{number}"
    elsif POS_FOR_DEN.has_key?(letter)
    add_one = (i == letters.size-1 || (i < letters.size-1 && POS_FOR_DEN.has_key?(letters[i+1])))
    add_zero = (i == 0 || i == letters.size-1)
    position = POS_FOR_DEN[letter] - (add_one || add_zero ? 1 : 0) + (add_zero ? 1 : 0)

    while number.length < position
    number = "0#{number}"
    MULTIPLIERS = {'十' => 10, '百' => 100, '千' => 1000, '万' => 10000}
    NUM_FOR_NUM = {'壱' => 1, '一' => 1, '1' => 1,
    '弐' => 2, '二' => 2, '2' => 2,
    '参' => 3, '三' => 3, '3' => 3,
    '四' => 4, '4' => 4,
    '五' => 5, '5' => 5,
    '六' => 6, '6' => 6,
    '七' => 7, '7' => 7,
    '八' => 8, '8' => 8,
    '九' => 9, '9' => 9,
    '零' => 0, '〇' => 0, '0' => 0}

    def self.convert(kanji_number)
    full_number = 0
    current_number = 0

    kanji_number.each_char do |kanji|
    if NUM_FOR_NUM.has_key?(kanji)
    number = NUM_FOR_NUM[kanji]
    current_number = current_number * 10 + number
    elsif MULTIPLIERS.has_key?(kanji)
    if current_number == 0 # Implicit "one" before hundred or thousand for example
    full_number += MULTIPLIERS[kanji]
    else
    full_number += current_number * MULTIPLIERS[kanji]
    end

    number = "1#{number}" if add_one
    current_number = 0
    end
    end

    number.to_i
    full_number += current_number
    end

    # Another method, WIP
    DENS = %r{ [#{POS_FOR_DEN.keys.join}] }x
    NUMS = %r{ [#{NUM_FOR_NUM.keys.join}] }x
    MATCHER = %r{ (?<num> #{NUMS}+ )? (?<denom> #{DENS} ) }x
    def self.convert2(kanji)
    number = ''
    last_pos = 0

    puts MATCHER.inspect
    kanji.gsub!(MATCHER) do |md|
    puts "---"
    num = $1.nil? ? '一' : $1
    puts num

    denom = $2.nil? ? 0 : POS_FOR_DEN[$2]
    puts denom

    if number.length == 0
    number = '0' * (denom + num.length)
    end
    puts number

    conv_num = num.chars.to_a.collect { |n| NUM_FOR_NUM[n] }.join
    number[last_pos, (conv_num.length)] = conv_num
    last_pos = last_pos + conv_num.length

    ''
    end

    number.to_i
    end

    end

    require 'minitest/autorun'
    @@ -93,10 +57,10 @@ def test_convert

    assert_equal 321, KanjiToNumber.convert('三二一')
    assert_equal 4321, KanjiToNumber.convert('四三二一')

    assert_equal 10001, KanjiToNumber.convert('万一')
    assert_equal 10001, KanjiToNumber.convert('一万一')
    assert_equal 240001, KanjiToNumber.convert('24万一')
    assert_equal 11110, KanjiToNumber.convert('万千百十')
    end
    end
    end
  2. @Kimtaro Kimtaro created this gist Jun 15, 2012.
    102 changes: 102 additions & 0 deletions kanji_to_number.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,102 @@
    # Encoding: UTF-8

    module KanjiToNumber
    POS_FOR_DEN = {'十' => 1, '百' => 2, '千' => 3, '万' => 4}
    NUM_FOR_NUM = {'壱' => '1', '一' => '1', '1' => '1',
    '弐' => '2', '二' => '2', '2' => '2',
    '参' => '3', '三' => '3', '3' => '3',
    '四' => '4', '4' => '4',
    '五' => '5', '5' => '5',
    '六' => '6', '6' => '6',
    '七' => '7', '7' => '7',
    '八' => '8', '8' => '8',
    '九' => '9', '9' => '9',
    '零' => '0', '〇' => '0', '0' => '0'}

    def self.convert(kanji)
    number = ''
    letters = kanji.chars.to_a.reverse

    letters.each_with_index do |letter, i|
    if NUM_FOR_NUM.has_key?(letter)
    number = "#{NUM_FOR_NUM[letter]}#{number}"
    elsif POS_FOR_DEN.has_key?(letter)
    add_one = (i == letters.size-1 || (i < letters.size-1 && POS_FOR_DEN.has_key?(letters[i+1])))
    add_zero = (i == 0 || i == letters.size-1)
    position = POS_FOR_DEN[letter] - (add_one || add_zero ? 1 : 0) + (add_zero ? 1 : 0)

    while number.length < position
    number = "0#{number}"
    end

    number = "1#{number}" if add_one
    end
    end

    number.to_i
    end

    # Another method, WIP
    DENS = %r{ [#{POS_FOR_DEN.keys.join}] }x
    NUMS = %r{ [#{NUM_FOR_NUM.keys.join}] }x
    MATCHER = %r{ (?<num> #{NUMS}+ )? (?<denom> #{DENS} ) }x
    def self.convert2(kanji)
    number = ''
    last_pos = 0

    puts MATCHER.inspect
    kanji.gsub!(MATCHER) do |md|
    puts "---"
    num = $1.nil? ? '一' : $1
    puts num

    denom = $2.nil? ? 0 : POS_FOR_DEN[$2]
    puts denom

    if number.length == 0
    number = '0' * (denom + num.length)
    end
    puts number

    conv_num = num.chars.to_a.collect { |n| NUM_FOR_NUM[n] }.join
    number[last_pos, (conv_num.length)] = conv_num
    last_pos = last_pos + conv_num.length

    ''
    end

    number.to_i
    end

    end

    require 'minitest/autorun'
    class KanjiToNumberTest < MiniTest::Unit::TestCase
    def test_convert
    assert_equal 2012, KanjiToNumber.convert('二千十二')
    assert_equal 2012, KanjiToNumber.convert('二〇一二')
    assert_equal 2012, KanjiToNumber.convert('二〇一二')

    assert_equal 2222, KanjiToNumber.convert('二千二百二十二')
    assert_equal 2022, KanjiToNumber.convert('二千二十二')
    assert_equal 2002, KanjiToNumber.convert('二千二')
    assert_equal 2000, KanjiToNumber.convert('二千')
    assert_equal 1000, KanjiToNumber.convert('千')

    assert_equal 2220, KanjiToNumber.convert('二千二百二十')
    assert_equal 2200, KanjiToNumber.convert('二千二百')
    assert_equal 2002, KanjiToNumber.convert('二千二')

    assert_equal 1222, KanjiToNumber.convert('千二百二十二')
    assert_equal 222, KanjiToNumber.convert('二百二十二')
    assert_equal 122, KanjiToNumber.convert('百二十二')

    assert_equal 321, KanjiToNumber.convert('三二一')
    assert_equal 4321, KanjiToNumber.convert('四三二一')

    assert_equal 10001, KanjiToNumber.convert('万一')
    assert_equal 10001, KanjiToNumber.convert('一万一')
    assert_equal 240001, KanjiToNumber.convert('24万一')
    assert_equal 11110, KanjiToNumber.convert('万千百十')
    end
    end