Created
April 1, 2026 00:09
-
-
Save Wowfunhappy/c2c882e4344fccf3641279eb9c38930c to your computer and use it in GitHub Desktop.
tmlanguage JSON Converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # encoding: utf-8 | |
| # | |
| # JSON tmLanguage Converter | |
| # Compatible with Ruby 1.8+ | |
| # | |
| # Usage: ruby json_converter.rb <file.tmLanguage.json> [output.tmLanguage] | |
| # ruby json_converter.rb <directory> | |
| # | |
| # Converts VS Code-style .tmLanguage.json files to TextMate .tmLanguage | |
| # (plist XML) format. | |
| # | |
| # When given a directory, converts all .tmLanguage.json files found in it. | |
| require 'fileutils' | |
| require 'digest/md5' | |
| # ============================================================================ | |
| # UUID Generator (deterministic from seed) | |
| # ============================================================================ | |
| def generate_uuid(seed) | |
| hash = Digest::MD5.hexdigest(seed) | |
| ("%s-%s-%s-%s-%s" % [ | |
| hash[0, 8], hash[8, 4], hash[12, 4], hash[16, 4], hash[20, 12] | |
| ]).upcase | |
| end | |
| # ============================================================================ | |
| # Minimal JSON Parser (Ruby 1.8 compatible) | |
| # ============================================================================ | |
| # Ruby 1.8 does not include a JSON library by default. This parser handles | |
| # the subset of JSON produced by VS Code grammar files. | |
| module SimpleJSON | |
| class ParseError < StandardError; end | |
| def self.parse(str) | |
| parser = Parser.new(str) | |
| value = parser.parse_value | |
| parser.skip_whitespace | |
| unless parser.eof? | |
| raise ParseError, "Unexpected content after JSON value at position #{parser.pos}" | |
| end | |
| value | |
| end | |
| class Parser | |
| attr_reader :pos | |
| def initialize(str) | |
| @str = str | |
| @pos = 0 | |
| @len = str.length | |
| end | |
| def eof? | |
| @pos >= @len | |
| end | |
| def parse_value | |
| skip_whitespace | |
| raise ParseError, "Unexpected end of input" if eof? | |
| c = @str[@pos, 1] | |
| case c | |
| when '"' | |
| parse_string | |
| when '{' | |
| parse_object | |
| when '[' | |
| parse_array | |
| when 't' | |
| parse_literal('true', true) | |
| when 'f' | |
| parse_literal('false', false) | |
| when 'n' | |
| parse_literal('null', nil) | |
| when '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' | |
| parse_number | |
| else | |
| raise ParseError, "Unexpected character '#{c}' at position #{@pos}" | |
| end | |
| end | |
| def skip_whitespace | |
| while @pos < @len | |
| c = @str[@pos, 1] | |
| if c == ' ' || c == "\t" || c == "\n" || c == "\r" | |
| @pos += 1 | |
| elsif c == '/' && @pos + 1 < @len | |
| # Handle // and /* */ comments (not standard JSON but common in .json files) | |
| c2 = @str[@pos + 1, 1] | |
| if c2 == '/' | |
| @pos += 2 | |
| while @pos < @len && @str[@pos, 1] != "\n" | |
| @pos += 1 | |
| end | |
| elsif c2 == '*' | |
| @pos += 2 | |
| while @pos + 1 < @len | |
| if @str[@pos, 1] == '*' && @str[@pos + 1, 1] == '/' | |
| @pos += 2 | |
| break | |
| end | |
| @pos += 1 | |
| end | |
| else | |
| break | |
| end | |
| else | |
| break | |
| end | |
| end | |
| end | |
| def parse_string | |
| raise ParseError, "Expected '\"' at position #{@pos}" unless @str[@pos, 1] == '"' | |
| @pos += 1 | |
| result = '' | |
| while @pos < @len | |
| c = @str[@pos, 1] | |
| if c == '\\' | |
| @pos += 1 | |
| raise ParseError, "Unexpected end of string escape" if eof? | |
| esc = @str[@pos, 1] | |
| case esc | |
| when '"', '\\', '/' | |
| result << esc | |
| when 'b' | |
| result << "\b" | |
| when 'f' | |
| result << "\f" | |
| when 'n' | |
| result << "\n" | |
| when 'r' | |
| result << "\r" | |
| when 't' | |
| result << "\t" | |
| when 'u' | |
| hex = @str[@pos + 1, 4] | |
| raise ParseError, "Invalid unicode escape at position #{@pos}" if hex.length < 4 | |
| codepoint = hex.to_i(16) | |
| # Handle surrogate pairs | |
| if codepoint >= 0xD800 && codepoint <= 0xDBFF | |
| # High surrogate — look for low surrogate | |
| if @str[@pos + 5, 2] == '\\u' | |
| hex2 = @str[@pos + 7, 4] | |
| low = hex2.to_i(16) | |
| if low >= 0xDC00 && low <= 0xDFFF | |
| codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (low - 0xDC00) | |
| @pos += 6 # skip the extra \uXXXX | |
| end | |
| end | |
| end | |
| result << [codepoint].pack('U') | |
| @pos += 4 | |
| else | |
| result << esc | |
| end | |
| @pos += 1 | |
| elsif c == '"' | |
| @pos += 1 | |
| return result | |
| else | |
| result << c | |
| @pos += 1 | |
| end | |
| end | |
| raise ParseError, "Unterminated string" | |
| end | |
| def parse_object | |
| raise ParseError, "Expected '{' at position #{@pos}" unless @str[@pos, 1] == '{' | |
| @pos += 1 | |
| result = {} | |
| # Use an array to track insertion order (Ruby 1.8 hashes are unordered) | |
| keys_order = [] | |
| skip_whitespace | |
| if @str[@pos, 1] == '}' | |
| @pos += 1 | |
| return result | |
| end | |
| loop do | |
| skip_whitespace | |
| # Allow trailing commas | |
| break if @str[@pos, 1] == '}' | |
| key = parse_string | |
| skip_whitespace | |
| raise ParseError, "Expected ':' at position #{@pos}" unless @str[@pos, 1] == ':' | |
| @pos += 1 | |
| value = parse_value | |
| result[key] = value | |
| keys_order << key | |
| skip_whitespace | |
| if @str[@pos, 1] == ',' | |
| @pos += 1 | |
| elsif @str[@pos, 1] == '}' | |
| # will break on next iteration | |
| else | |
| raise ParseError, "Expected ',' or '}' at position #{@pos}, got '#{@str[@pos, 1]}'" | |
| end | |
| end | |
| raise ParseError, "Expected '}' at position #{@pos}" unless @str[@pos, 1] == '}' | |
| @pos += 1 | |
| result | |
| end | |
| def parse_array | |
| raise ParseError, "Expected '[' at position #{@pos}" unless @str[@pos, 1] == '[' | |
| @pos += 1 | |
| result = [] | |
| skip_whitespace | |
| if @str[@pos, 1] == ']' | |
| @pos += 1 | |
| return result | |
| end | |
| loop do | |
| skip_whitespace | |
| # Allow trailing commas | |
| break if @str[@pos, 1] == ']' | |
| result << parse_value | |
| skip_whitespace | |
| if @str[@pos, 1] == ',' | |
| @pos += 1 | |
| elsif @str[@pos, 1] == ']' | |
| # will break on next iteration | |
| else | |
| raise ParseError, "Expected ',' or ']' at position #{@pos}, got '#{@str[@pos, 1]}'" | |
| end | |
| end | |
| raise ParseError, "Expected ']' at position #{@pos}" unless @str[@pos, 1] == ']' | |
| @pos += 1 | |
| result | |
| end | |
| def parse_number | |
| start = @pos | |
| @pos += 1 if @str[@pos, 1] == '-' | |
| while @pos < @len && @str[@pos, 1] >= '0' && @str[@pos, 1] <= '9' | |
| @pos += 1 | |
| end | |
| if @pos < @len && @str[@pos, 1] == '.' | |
| @pos += 1 | |
| while @pos < @len && @str[@pos, 1] >= '0' && @str[@pos, 1] <= '9' | |
| @pos += 1 | |
| end | |
| end | |
| if @pos < @len && (@str[@pos, 1] == 'e' || @str[@pos, 1] == 'E') | |
| @pos += 1 | |
| @pos += 1 if @pos < @len && (@str[@pos, 1] == '+' || @str[@pos, 1] == '-') | |
| while @pos < @len && @str[@pos, 1] >= '0' && @str[@pos, 1] <= '9' | |
| @pos += 1 | |
| end | |
| end | |
| num_str = @str[start, @pos - start] | |
| if num_str.include?('.') || num_str.include?('e') || num_str.include?('E') | |
| num_str.to_f | |
| else | |
| num_str.to_i | |
| end | |
| end | |
| def parse_literal(expected, value) | |
| if @str[@pos, expected.length] == expected | |
| @pos += expected.length | |
| value | |
| else | |
| raise ParseError, "Expected '#{expected}' at position #{@pos}" | |
| end | |
| end | |
| end | |
| end | |
| # ============================================================================ | |
| # Plist XML Writer | |
| # ============================================================================ | |
| module Plist | |
| def self.generate(value) | |
| out = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" | |
| out << "<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n" | |
| out << "<plist version=\"1.0\">\n" | |
| out << write_value(value, 0) | |
| out << "</plist>\n" | |
| out | |
| end | |
| def self.write_value(value, depth) | |
| indent = "\t" * depth | |
| case value | |
| when Hash | |
| write_dict(value, depth) | |
| when Array | |
| write_array(value, depth) | |
| when String | |
| "#{indent}<string>#{escape_xml(value)}</string>\n" | |
| when Integer | |
| "#{indent}<integer>#{value}</integer>\n" | |
| when Float | |
| "#{indent}<real>#{value}</real>\n" | |
| when TrueClass | |
| "#{indent}<true/>\n" | |
| when FalseClass | |
| "#{indent}<false/>\n" | |
| when NilClass | |
| "" | |
| else | |
| "#{indent}<string>#{escape_xml(value.to_s)}</string>\n" | |
| end | |
| end | |
| def self.write_dict(hash, depth) | |
| indent = "\t" * depth | |
| out = "#{indent}<dict>\n" | |
| hash.each do |key, val| | |
| next if val.nil? | |
| out << "#{indent}\t<key>#{escape_xml(key.to_s)}</key>\n" | |
| out << write_value(val, depth + 1) | |
| end | |
| out << "#{indent}</dict>\n" | |
| out | |
| end | |
| def self.write_array(array, depth) | |
| indent = "\t" * depth | |
| out = "#{indent}<array>\n" | |
| array.each do |item| | |
| out << write_value(item, depth + 1) | |
| end | |
| out << "#{indent}</array>\n" | |
| out | |
| end | |
| def self.escape_xml(str) | |
| str.gsub('&', '&').gsub('<', '<').gsub('>', '>') | |
| end | |
| end | |
| # ============================================================================ | |
| # JSON to tmLanguage Converter | |
| # ============================================================================ | |
| class JsonTmLanguageConverter | |
| # Keys to strip from the output (VS Code metadata, not part of tmLanguage) | |
| STRIP_KEYS = ['information_for_contributors', 'version', '$schema'] | |
| # Map scopeName to fileTypes for VS Code grammars that lack fileTypes. | |
| # VS Code stores file associations in package.json, not in the grammar, | |
| # so we need to provide them when converting to TextMate format. | |
| SCOPE_FILE_TYPES = { | |
| 'source.js' => %w[js jsx mjs cjs], | |
| 'source.js.jsx' => %w[jsx], | |
| 'source.ts' => %w[ts], | |
| 'source.tsx' => %w[tsx], | |
| 'source.css' => %w[css css.erb], | |
| 'source.json' => %w[json], | |
| 'source.lua' => %w[lua], | |
| 'source.php' => %w[php php3 php4 php5 php7 php8 phps phtml], | |
| 'source.ruby' => %w[rb rbx rjs rabl gemspec podspec irbrc], | |
| 'source.python' => %w[py pyw], | |
| 'source.go' => %w[go], | |
| 'source.rust' => %w[rs], | |
| 'source.swift' => %w[swift], | |
| 'source.java' => %w[java], | |
| 'source.kotlin' => %w[kt kts], | |
| 'source.scala' => %w[scala sbt], | |
| 'source.c' => %w[c h], | |
| 'source.cpp' => %w[cpp cc cxx c++ hpp hh hxx h++], | |
| 'source.objc' => %w[m], | |
| 'source.objcpp' => %w[mm], | |
| 'source.cs' => %w[cs], | |
| 'source.shell' => %w[sh bash], | |
| 'source.yaml' => %w[yaml yml], | |
| 'source.toml' => %w[toml], | |
| 'source.perl' => %w[pl pm pod], | |
| 'source.r' => %w[r R], | |
| 'source.elixir' => %w[ex exs], | |
| 'source.erlang' => %w[erl hrl], | |
| 'source.haskell' => %w[hs], | |
| 'source.dart' => %w[dart], | |
| 'source.zig' => %w[zig], | |
| 'source.scss' => %w[scss], | |
| 'source.sass' => %w[sass], | |
| 'source.less' => %w[less], | |
| 'source.coffee' => %w[coffee], | |
| 'source.groovy' => %w[groovy gvy], | |
| 'text.html.basic' => %w[html htm shtml xhtml], | |
| 'text.xml' => %w[xml xsd xsl xslt], | |
| 'text.html.markdown' => %w[md markdown mdown], | |
| } | |
| def convert_file(input_path, output_path) | |
| raw = File.read(input_path) | |
| data = SimpleJSON.parse(raw) | |
| unless data.is_a?(Hash) | |
| raise "Expected top-level JSON object in #{input_path}" | |
| end | |
| # Remove VS Code-specific metadata keys | |
| STRIP_KEYS.each { |k| data.delete(k) } | |
| # VS Code grammars use scopeName but omit fileTypes (file associations | |
| # live in package.json instead). TextMate needs fileTypes to auto-detect. | |
| if !data['fileTypes'] && data['scopeName'] | |
| scope = data['scopeName'] | |
| if SCOPE_FILE_TYPES.key?(scope) | |
| data['fileTypes'] = SCOPE_FILE_TYPES[scope] | |
| $stderr.puts " + fileTypes inferred from scopeName '#{scope}'" | |
| else | |
| $stderr.puts " WARNING: No fileTypes and unknown scopeName '#{scope}' — file type detection will not work in TextMate" | |
| end | |
| end | |
| # VS Code supports 'while' patterns (begin/while) that TextMate does not. | |
| # Convert them to begin/end by inverting the while condition. | |
| count = convert_while_patterns(data) | |
| if count > 0 | |
| $stderr.puts " + converted #{count} while pattern(s) to begin/end" | |
| end | |
| # Generate a deterministic UUID if one isn't already present | |
| unless data['uuid'] | |
| seed = data['scopeName'] || data['name'] || input_path | |
| data['uuid'] = generate_uuid("json-tmlanguage:#{seed}") | |
| end | |
| plist_xml = Plist.generate(data) | |
| File.open(output_path, 'w') { |f| f.write(plist_xml) } | |
| plist_xml.length | |
| end | |
| private | |
| # Recursively find patterns with 'while' (VS Code feature) and convert | |
| # them to 'end' (TextMate compatible). | |
| # | |
| # VS Code while patterns typically look like: | |
| # begin: <start pattern> | |
| # while: ^(?!<terminator>) — "keep going while NOT at terminator" | |
| # | |
| # The TextMate equivalent is: | |
| # begin: <start pattern> | |
| # end: <terminator> — "stop when you reach terminator" | |
| def convert_while_patterns(node) | |
| count = 0 | |
| case node | |
| when Hash | |
| if node.key?('while') && node.key?('begin') && !node.key?('end') | |
| while_pat = node.delete('while') | |
| node['end'] = while_to_end(while_pat) | |
| # Rename whileCaptures -> endCaptures | |
| if node.key?('whileCaptures') | |
| node['endCaptures'] = node.delete('whileCaptures') | |
| end | |
| count += 1 | |
| elsif node.key?('while') | |
| # Has both 'while' and 'end' (or no 'begin') — just remove 'while' | |
| node.delete('while') | |
| node.delete('whileCaptures') | |
| count += 1 | |
| end | |
| node.each_value { |v| count += convert_while_patterns(v) } | |
| when Array | |
| node.each { |v| count += convert_while_patterns(v) } | |
| end | |
| count | |
| end | |
| # Convert a while regex to an end regex. | |
| # | |
| # Most while patterns are of the form ^(?!...) meaning "continue while | |
| # the line does NOT match ...". We strip the negative lookahead wrapper | |
| # to get the terminator pattern for end. | |
| def while_to_end(while_pat) | |
| # Match: ^(?!<inner>) or ^(?!<inner>)$ with possible anchors | |
| if while_pat =~ /\A\^?\(\?!(.*)\)\$?\z/m | |
| $1 | |
| else | |
| # Fallback: negate with a lookahead (less precise but functional) | |
| "(?!#{while_pat})\\z" | |
| end | |
| end | |
| end | |
| # ============================================================================ | |
| # Main | |
| # ============================================================================ | |
| if __FILE__ == $0 | |
| if ARGV.length < 1 || ARGV.include?('--help') || ARGV.include?('-h') | |
| $stderr.puts "Usage: ruby json_converter.rb <file.tmLanguage.json> [output.tmLanguage]" | |
| $stderr.puts " ruby json_converter.rb <directory>" | |
| $stderr.puts "" | |
| $stderr.puts "Converts VS Code .tmLanguage.json files to TextMate .tmLanguage (plist XML)." | |
| $stderr.puts "" | |
| $stderr.puts "When given a directory, converts all .tmLanguage.json files found in it." | |
| exit 1 | |
| end | |
| converter = JsonTmLanguageConverter.new | |
| if File.directory?(ARGV[0]) | |
| dir = ARGV[0] | |
| files = Dir.glob(File.join(dir, '**', '*.tmLanguage.json')) | |
| if files.empty? | |
| $stderr.puts "No .tmLanguage.json files found in #{dir}" | |
| exit 1 | |
| end | |
| $stderr.puts "Converting #{files.length} file(s) in #{dir}...\n\n" | |
| files.each do |input_path| | |
| output_path = input_path.sub(/\.tmLanguage\.json$/, '.tmLanguage') | |
| $stderr.puts " #{File.basename(input_path)}" | |
| begin | |
| bytes = converter.convert_file(input_path, output_path) | |
| $stderr.puts " -> #{File.basename(output_path)} (#{bytes} bytes)" | |
| rescue => e | |
| $stderr.puts " ERROR: #{e.message}" | |
| end | |
| end | |
| $stderr.puts "\nDone." | |
| else | |
| input_path = ARGV[0] | |
| unless File.exist?(input_path) | |
| $stderr.puts "ERROR: File not found: #{input_path}" | |
| exit 1 | |
| end | |
| if ARGV[1] | |
| output_path = ARGV[1] | |
| else | |
| output_path = input_path.sub(/\.tmLanguage\.json$/, '.tmLanguage') | |
| if output_path == input_path | |
| output_path = input_path.sub(/\.json$/, '') + '.tmLanguage' | |
| end | |
| end | |
| $stderr.puts "Converting: #{input_path}" | |
| $stderr.puts "Output: #{output_path}\n\n" | |
| begin | |
| bytes = converter.convert_file(input_path, output_path) | |
| $stderr.puts "Done. (#{bytes} bytes)" | |
| rescue => e | |
| $stderr.puts "ERROR: #{e.message}" | |
| $stderr.puts e.backtrace.first(5).join("\n") if e.backtrace | |
| exit 1 | |
| end | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment