Created
March 2, 2015 19:33
Revisions
-
CodeMonkeySteve created this gist
Mar 2, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,114 @@ #!/usr/bin/env ruby require 'bing_translator' require 'active_support' require 'active_support/core_ext' class BingTranslator @@cache = {} @@cache_path = __dir__+'/.xliff_trans_cache' @@cache = YAML.load_file(@@cache_path) if File.exist?(@@cache_path) def self.save_cache YAML.dump(@@cache, File.open(@@cache_path, 'w')) end TRANSLATE_ARRAY_URI = URI.parse('http://api.microsofttranslator.com/V2/Http.svc/TranslateArray').freeze alias_method :translate_one, :translate def translate(*strings, to:, from: 'en') return [] if strings.empty? cache = (@@cache[to.to_s] ||= {}) trans = strings.reject { |str| cache.include?(str) }.uniq trans.reject! do |str| if str.match /<[\w-]+>/ res = translate_one(str, from: from, to: to, contentType: 'text/html') cache[str] = res if res.present? true end end if trans.present? params = { texts: trans, from: CGI.escape(from.to_s), to: CGI.escape(to.to_s) } doc = Nokogiri.XML( array_result(TRANSLATE_ARRAY_URI, params).body ) res = doc.xpath('xmlns:ArrayOfTranslateArrayResponse/xmlns:TranslateArrayResponse/xmlns:TranslatedText').map(&:text) trans.each.with_index { |src, idx| cache[src] = res[idx] if res[idx].present? } end strings.map { |str| [str, cache[str]] }.to_h end def array_result(uri, params = {}, headers = {}) get_access_token http = Net::HTTP.new(uri.host, uri.port) if uri.scheme == "https" http.use_ssl = true http.verify_mode = OpenSSL::SSL::VERIFY_NONE if @skip_ssl_verify end builder = Nokogiri::XML::Builder.new do |xml| xml.TranslateArrayRequest do xml.AppId xml.From_ params[:from] xml.Options_ do xml.CotentType({xmlns: 'http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2'}, 'text/html') end xml.Texts do params[:texts].each do |text| xml.string({xmlns: 'http://schemas.microsoft.com/2003/10/Serialization/Arrays'}, text ) end end xml.To_ params[:to] end end request = Net::HTTP::Post.new(uri.path) request.add_field 'Content-Type', 'application/xml' request.add_field 'Authorization',"Bearer #{@access_token['access_token']}" request.body = builder.to_xml(indent: 2) results = http.request(request) if results.response.code.to_i == 200 results else html = Nokogiri::HTML(results.body) raise Exception, html.xpath("//text()").remove.map(&:to_s).join(' ') end end end translator = BingTranslator.new('org-finagle-sandbox', 'YdNt0qFIsWChIp5uDlcfLq4td8Xn9MCXvEcBGsXEe/I=') ARGV.each do |path| xliff = Nokogiri.XML(File.read(path)) file = xliff.at_xpath('/xmlns:xliff/xmlns:file') raise "Missing file tag" unless file datatype = file['datatype'] raise "Unsupported datatype #{datatype}" unless datatype == 'plaintext' src_locale, tgt_locale = file['source-language'], file['target-language'] trans = {} file.xpath('//xmlns:trans-unit').each do |unit| src = unit.at_xpath('xmlns:source') raise "Missing source translation" unless src.present? next unless tgt = unit.at_xpath('xmlns:target') trans[tgt] = src.text end res = translator.translate(*trans.values, to: tgt_locale, from: src_locale) trans.each do |tgt, src_text| tgt.content = res[src_text] end out_path = path.gsub(/^([^.]+)(\..+)$/, '\1-auto\2') File.open(out_path, 'w') { |f| f.write(xliff.to_xml(indent: 2)) } end BingTranslator.save_cache