Skip to content

Instantly share code, notes, and snippets.

@CodeMonkeySteve
Created March 2, 2015 19:33

Revisions

  1. CodeMonkeySteve created this gist Mar 2, 2015.
    114 changes: 114 additions & 0 deletions xliff_trans
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,114 @@
    #!/usr/bin/env ruby

    require 'bing_translator'
    require 'active_support'
    require 'active_support/core_ext'

    class BingTranslator
    @@cache = {}
    @@cache_path = __dir__+'/.xliff_trans_cache'
    @@cache = YAML.load_file(@@cache_path) if File.exist?(@@cache_path)

    def self.save_cache
    YAML.dump(@@cache, File.open(@@cache_path, 'w'))
    end

    TRANSLATE_ARRAY_URI = URI.parse('http://api.microsofttranslator.com/V2/Http.svc/TranslateArray').freeze

    alias_method :translate_one, :translate
    def translate(*strings, to:, from: 'en')
    return [] if strings.empty?

    cache = (@@cache[to.to_s] ||= {})
    trans = strings.reject { |str| cache.include?(str) }.uniq

    trans.reject! do |str|
    if str.match /<[\w-]+>/
    res = translate_one(str, from: from, to: to, contentType: 'text/html')
    cache[str] = res if res.present?
    true
    end
    end

    if trans.present?
    params = { texts: trans, from: CGI.escape(from.to_s), to: CGI.escape(to.to_s) }
    doc = Nokogiri.XML( array_result(TRANSLATE_ARRAY_URI, params).body )
    res = doc.xpath('xmlns:ArrayOfTranslateArrayResponse/xmlns:TranslateArrayResponse/xmlns:TranslatedText').map(&:text)
    trans.each.with_index { |src, idx| cache[src] = res[idx] if res[idx].present? }
    end

    strings.map { |str| [str, cache[str]] }.to_h
    end

    def array_result(uri, params = {}, headers = {})
    get_access_token
    http = Net::HTTP.new(uri.host, uri.port)

    if uri.scheme == "https"
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE if @skip_ssl_verify
    end

    builder = Nokogiri::XML::Builder.new do |xml|
    xml.TranslateArrayRequest do
    xml.AppId
    xml.From_ params[:from]
    xml.Options_ do
    xml.CotentType({xmlns: 'http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2'}, 'text/html')
    end
    xml.Texts do
    params[:texts].each do |text|
    xml.string({xmlns: 'http://schemas.microsoft.com/2003/10/Serialization/Arrays'}, text )
    end
    end
    xml.To_ params[:to]
    end
    end

    request = Net::HTTP::Post.new(uri.path)
    request.add_field 'Content-Type', 'application/xml'
    request.add_field 'Authorization',"Bearer #{@access_token['access_token']}"
    request.body = builder.to_xml(indent: 2)

    results = http.request(request)
    if results.response.code.to_i == 200
    results
    else
    html = Nokogiri::HTML(results.body)
    raise Exception, html.xpath("//text()").remove.map(&:to_s).join(' ')
    end
    end
    end


    translator = BingTranslator.new('org-finagle-sandbox', 'YdNt0qFIsWChIp5uDlcfLq4td8Xn9MCXvEcBGsXEe/I=')

    ARGV.each do |path|
    xliff = Nokogiri.XML(File.read(path))

    file = xliff.at_xpath('/xmlns:xliff/xmlns:file')
    raise "Missing file tag" unless file

    datatype = file['datatype']
    raise "Unsupported datatype #{datatype}" unless datatype == 'plaintext'

    src_locale, tgt_locale = file['source-language'], file['target-language']

    trans = {}
    file.xpath('//xmlns:trans-unit').each do |unit|
    src = unit.at_xpath('xmlns:source')
    raise "Missing source translation" unless src.present?
    next unless tgt = unit.at_xpath('xmlns:target')
    trans[tgt] = src.text
    end

    res = translator.translate(*trans.values, to: tgt_locale, from: src_locale)
    trans.each do |tgt, src_text|
    tgt.content = res[src_text]
    end

    out_path = path.gsub(/^([^.]+)(\..+)$/, '\1-auto\2')
    File.open(out_path, 'w') { |f| f.write(xliff.to_xml(indent: 2)) }
    end

    BingTranslator.save_cache