Skip to content

Instantly share code, notes, and snippets.

@Wowfunhappy
Created April 1, 2026 00:09
Show Gist options
  • Select an option

  • Save Wowfunhappy/c2c882e4344fccf3641279eb9c38930c to your computer and use it in GitHub Desktop.

Select an option

Save Wowfunhappy/c2c882e4344fccf3641279eb9c38930c to your computer and use it in GitHub Desktop.
tmlanguage JSON Converter
#!/usr/bin/env ruby
# encoding: utf-8
#
# JSON tmLanguage Converter
# Compatible with Ruby 1.8+
#
# Usage: ruby json_converter.rb <file.tmLanguage.json> [output.tmLanguage]
# ruby json_converter.rb <directory>
#
# Converts VS Code-style .tmLanguage.json files to TextMate .tmLanguage
# (plist XML) format.
#
# When given a directory, converts all .tmLanguage.json files found in it.
require 'fileutils'
require 'digest/md5'
# ============================================================================
# UUID Generator (deterministic from seed)
# ============================================================================
def generate_uuid(seed)
hash = Digest::MD5.hexdigest(seed)
("%s-%s-%s-%s-%s" % [
hash[0, 8], hash[8, 4], hash[12, 4], hash[16, 4], hash[20, 12]
]).upcase
end
# ============================================================================
# Minimal JSON Parser (Ruby 1.8 compatible)
# ============================================================================
# Ruby 1.8 does not include a JSON library by default. This parser handles
# the subset of JSON produced by VS Code grammar files.
module SimpleJSON
class ParseError < StandardError; end
def self.parse(str)
parser = Parser.new(str)
value = parser.parse_value
parser.skip_whitespace
unless parser.eof?
raise ParseError, "Unexpected content after JSON value at position #{parser.pos}"
end
value
end
class Parser
attr_reader :pos
def initialize(str)
@str = str
@pos = 0
@len = str.length
end
def eof?
@pos >= @len
end
def parse_value
skip_whitespace
raise ParseError, "Unexpected end of input" if eof?
c = @str[@pos, 1]
case c
when '"'
parse_string
when '{'
parse_object
when '['
parse_array
when 't'
parse_literal('true', true)
when 'f'
parse_literal('false', false)
when 'n'
parse_literal('null', nil)
when '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
parse_number
else
raise ParseError, "Unexpected character '#{c}' at position #{@pos}"
end
end
def skip_whitespace
while @pos < @len
c = @str[@pos, 1]
if c == ' ' || c == "\t" || c == "\n" || c == "\r"
@pos += 1
elsif c == '/' && @pos + 1 < @len
# Handle // and /* */ comments (not standard JSON but common in .json files)
c2 = @str[@pos + 1, 1]
if c2 == '/'
@pos += 2
while @pos < @len && @str[@pos, 1] != "\n"
@pos += 1
end
elsif c2 == '*'
@pos += 2
while @pos + 1 < @len
if @str[@pos, 1] == '*' && @str[@pos + 1, 1] == '/'
@pos += 2
break
end
@pos += 1
end
else
break
end
else
break
end
end
end
def parse_string
raise ParseError, "Expected '\"' at position #{@pos}" unless @str[@pos, 1] == '"'
@pos += 1
result = ''
while @pos < @len
c = @str[@pos, 1]
if c == '\\'
@pos += 1
raise ParseError, "Unexpected end of string escape" if eof?
esc = @str[@pos, 1]
case esc
when '"', '\\', '/'
result << esc
when 'b'
result << "\b"
when 'f'
result << "\f"
when 'n'
result << "\n"
when 'r'
result << "\r"
when 't'
result << "\t"
when 'u'
hex = @str[@pos + 1, 4]
raise ParseError, "Invalid unicode escape at position #{@pos}" if hex.length < 4
codepoint = hex.to_i(16)
# Handle surrogate pairs
if codepoint >= 0xD800 && codepoint <= 0xDBFF
# High surrogate — look for low surrogate
if @str[@pos + 5, 2] == '\\u'
hex2 = @str[@pos + 7, 4]
low = hex2.to_i(16)
if low >= 0xDC00 && low <= 0xDFFF
codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (low - 0xDC00)
@pos += 6 # skip the extra \uXXXX
end
end
end
result << [codepoint].pack('U')
@pos += 4
else
result << esc
end
@pos += 1
elsif c == '"'
@pos += 1
return result
else
result << c
@pos += 1
end
end
raise ParseError, "Unterminated string"
end
def parse_object
raise ParseError, "Expected '{' at position #{@pos}" unless @str[@pos, 1] == '{'
@pos += 1
result = {}
# Use an array to track insertion order (Ruby 1.8 hashes are unordered)
keys_order = []
skip_whitespace
if @str[@pos, 1] == '}'
@pos += 1
return result
end
loop do
skip_whitespace
# Allow trailing commas
break if @str[@pos, 1] == '}'
key = parse_string
skip_whitespace
raise ParseError, "Expected ':' at position #{@pos}" unless @str[@pos, 1] == ':'
@pos += 1
value = parse_value
result[key] = value
keys_order << key
skip_whitespace
if @str[@pos, 1] == ','
@pos += 1
elsif @str[@pos, 1] == '}'
# will break on next iteration
else
raise ParseError, "Expected ',' or '}' at position #{@pos}, got '#{@str[@pos, 1]}'"
end
end
raise ParseError, "Expected '}' at position #{@pos}" unless @str[@pos, 1] == '}'
@pos += 1
result
end
def parse_array
raise ParseError, "Expected '[' at position #{@pos}" unless @str[@pos, 1] == '['
@pos += 1
result = []
skip_whitespace
if @str[@pos, 1] == ']'
@pos += 1
return result
end
loop do
skip_whitespace
# Allow trailing commas
break if @str[@pos, 1] == ']'
result << parse_value
skip_whitespace
if @str[@pos, 1] == ','
@pos += 1
elsif @str[@pos, 1] == ']'
# will break on next iteration
else
raise ParseError, "Expected ',' or ']' at position #{@pos}, got '#{@str[@pos, 1]}'"
end
end
raise ParseError, "Expected ']' at position #{@pos}" unless @str[@pos, 1] == ']'
@pos += 1
result
end
def parse_number
start = @pos
@pos += 1 if @str[@pos, 1] == '-'
while @pos < @len && @str[@pos, 1] >= '0' && @str[@pos, 1] <= '9'
@pos += 1
end
if @pos < @len && @str[@pos, 1] == '.'
@pos += 1
while @pos < @len && @str[@pos, 1] >= '0' && @str[@pos, 1] <= '9'
@pos += 1
end
end
if @pos < @len && (@str[@pos, 1] == 'e' || @str[@pos, 1] == 'E')
@pos += 1
@pos += 1 if @pos < @len && (@str[@pos, 1] == '+' || @str[@pos, 1] == '-')
while @pos < @len && @str[@pos, 1] >= '0' && @str[@pos, 1] <= '9'
@pos += 1
end
end
num_str = @str[start, @pos - start]
if num_str.include?('.') || num_str.include?('e') || num_str.include?('E')
num_str.to_f
else
num_str.to_i
end
end
def parse_literal(expected, value)
if @str[@pos, expected.length] == expected
@pos += expected.length
value
else
raise ParseError, "Expected '#{expected}' at position #{@pos}"
end
end
end
end
# ============================================================================
# Plist XML Writer
# ============================================================================
module Plist
def self.generate(value)
out = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
out << "<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n"
out << "<plist version=\"1.0\">\n"
out << write_value(value, 0)
out << "</plist>\n"
out
end
def self.write_value(value, depth)
indent = "\t" * depth
case value
when Hash
write_dict(value, depth)
when Array
write_array(value, depth)
when String
"#{indent}<string>#{escape_xml(value)}</string>\n"
when Integer
"#{indent}<integer>#{value}</integer>\n"
when Float
"#{indent}<real>#{value}</real>\n"
when TrueClass
"#{indent}<true/>\n"
when FalseClass
"#{indent}<false/>\n"
when NilClass
""
else
"#{indent}<string>#{escape_xml(value.to_s)}</string>\n"
end
end
def self.write_dict(hash, depth)
indent = "\t" * depth
out = "#{indent}<dict>\n"
hash.each do |key, val|
next if val.nil?
out << "#{indent}\t<key>#{escape_xml(key.to_s)}</key>\n"
out << write_value(val, depth + 1)
end
out << "#{indent}</dict>\n"
out
end
def self.write_array(array, depth)
indent = "\t" * depth
out = "#{indent}<array>\n"
array.each do |item|
out << write_value(item, depth + 1)
end
out << "#{indent}</array>\n"
out
end
def self.escape_xml(str)
str.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
end
end
# ============================================================================
# JSON to tmLanguage Converter
# ============================================================================
class JsonTmLanguageConverter
# Keys to strip from the output (VS Code metadata, not part of tmLanguage)
STRIP_KEYS = ['information_for_contributors', 'version', '$schema']
# Map scopeName to fileTypes for VS Code grammars that lack fileTypes.
# VS Code stores file associations in package.json, not in the grammar,
# so we need to provide them when converting to TextMate format.
SCOPE_FILE_TYPES = {
'source.js' => %w[js jsx mjs cjs],
'source.js.jsx' => %w[jsx],
'source.ts' => %w[ts],
'source.tsx' => %w[tsx],
'source.css' => %w[css css.erb],
'source.json' => %w[json],
'source.lua' => %w[lua],
'source.php' => %w[php php3 php4 php5 php7 php8 phps phtml],
'source.ruby' => %w[rb rbx rjs rabl gemspec podspec irbrc],
'source.python' => %w[py pyw],
'source.go' => %w[go],
'source.rust' => %w[rs],
'source.swift' => %w[swift],
'source.java' => %w[java],
'source.kotlin' => %w[kt kts],
'source.scala' => %w[scala sbt],
'source.c' => %w[c h],
'source.cpp' => %w[cpp cc cxx c++ hpp hh hxx h++],
'source.objc' => %w[m],
'source.objcpp' => %w[mm],
'source.cs' => %w[cs],
'source.shell' => %w[sh bash],
'source.yaml' => %w[yaml yml],
'source.toml' => %w[toml],
'source.perl' => %w[pl pm pod],
'source.r' => %w[r R],
'source.elixir' => %w[ex exs],
'source.erlang' => %w[erl hrl],
'source.haskell' => %w[hs],
'source.dart' => %w[dart],
'source.zig' => %w[zig],
'source.scss' => %w[scss],
'source.sass' => %w[sass],
'source.less' => %w[less],
'source.coffee' => %w[coffee],
'source.groovy' => %w[groovy gvy],
'text.html.basic' => %w[html htm shtml xhtml],
'text.xml' => %w[xml xsd xsl xslt],
'text.html.markdown' => %w[md markdown mdown],
}
def convert_file(input_path, output_path)
raw = File.read(input_path)
data = SimpleJSON.parse(raw)
unless data.is_a?(Hash)
raise "Expected top-level JSON object in #{input_path}"
end
# Remove VS Code-specific metadata keys
STRIP_KEYS.each { |k| data.delete(k) }
# VS Code grammars use scopeName but omit fileTypes (file associations
# live in package.json instead). TextMate needs fileTypes to auto-detect.
if !data['fileTypes'] && data['scopeName']
scope = data['scopeName']
if SCOPE_FILE_TYPES.key?(scope)
data['fileTypes'] = SCOPE_FILE_TYPES[scope]
$stderr.puts " + fileTypes inferred from scopeName '#{scope}'"
else
$stderr.puts " WARNING: No fileTypes and unknown scopeName '#{scope}' — file type detection will not work in TextMate"
end
end
# VS Code supports 'while' patterns (begin/while) that TextMate does not.
# Convert them to begin/end by inverting the while condition.
count = convert_while_patterns(data)
if count > 0
$stderr.puts " + converted #{count} while pattern(s) to begin/end"
end
# Generate a deterministic UUID if one isn't already present
unless data['uuid']
seed = data['scopeName'] || data['name'] || input_path
data['uuid'] = generate_uuid("json-tmlanguage:#{seed}")
end
plist_xml = Plist.generate(data)
File.open(output_path, 'w') { |f| f.write(plist_xml) }
plist_xml.length
end
private
# Recursively find patterns with 'while' (VS Code feature) and convert
# them to 'end' (TextMate compatible).
#
# VS Code while patterns typically look like:
# begin: <start pattern>
# while: ^(?!<terminator>) — "keep going while NOT at terminator"
#
# The TextMate equivalent is:
# begin: <start pattern>
# end: <terminator> — "stop when you reach terminator"
def convert_while_patterns(node)
count = 0
case node
when Hash
if node.key?('while') && node.key?('begin') && !node.key?('end')
while_pat = node.delete('while')
node['end'] = while_to_end(while_pat)
# Rename whileCaptures -> endCaptures
if node.key?('whileCaptures')
node['endCaptures'] = node.delete('whileCaptures')
end
count += 1
elsif node.key?('while')
# Has both 'while' and 'end' (or no 'begin') — just remove 'while'
node.delete('while')
node.delete('whileCaptures')
count += 1
end
node.each_value { |v| count += convert_while_patterns(v) }
when Array
node.each { |v| count += convert_while_patterns(v) }
end
count
end
# Convert a while regex to an end regex.
#
# Most while patterns are of the form ^(?!...) meaning "continue while
# the line does NOT match ...". We strip the negative lookahead wrapper
# to get the terminator pattern for end.
def while_to_end(while_pat)
# Match: ^(?!<inner>) or ^(?!<inner>)$ with possible anchors
if while_pat =~ /\A\^?\(\?!(.*)\)\$?\z/m
$1
else
# Fallback: negate with a lookahead (less precise but functional)
"(?!#{while_pat})\\z"
end
end
end
# ============================================================================
# Main
# ============================================================================
if __FILE__ == $0
if ARGV.length < 1 || ARGV.include?('--help') || ARGV.include?('-h')
$stderr.puts "Usage: ruby json_converter.rb <file.tmLanguage.json> [output.tmLanguage]"
$stderr.puts " ruby json_converter.rb <directory>"
$stderr.puts ""
$stderr.puts "Converts VS Code .tmLanguage.json files to TextMate .tmLanguage (plist XML)."
$stderr.puts ""
$stderr.puts "When given a directory, converts all .tmLanguage.json files found in it."
exit 1
end
converter = JsonTmLanguageConverter.new
if File.directory?(ARGV[0])
dir = ARGV[0]
files = Dir.glob(File.join(dir, '**', '*.tmLanguage.json'))
if files.empty?
$stderr.puts "No .tmLanguage.json files found in #{dir}"
exit 1
end
$stderr.puts "Converting #{files.length} file(s) in #{dir}...\n\n"
files.each do |input_path|
output_path = input_path.sub(/\.tmLanguage\.json$/, '.tmLanguage')
$stderr.puts " #{File.basename(input_path)}"
begin
bytes = converter.convert_file(input_path, output_path)
$stderr.puts " -> #{File.basename(output_path)} (#{bytes} bytes)"
rescue => e
$stderr.puts " ERROR: #{e.message}"
end
end
$stderr.puts "\nDone."
else
input_path = ARGV[0]
unless File.exist?(input_path)
$stderr.puts "ERROR: File not found: #{input_path}"
exit 1
end
if ARGV[1]
output_path = ARGV[1]
else
output_path = input_path.sub(/\.tmLanguage\.json$/, '.tmLanguage')
if output_path == input_path
output_path = input_path.sub(/\.json$/, '') + '.tmLanguage'
end
end
$stderr.puts "Converting: #{input_path}"
$stderr.puts "Output: #{output_path}\n\n"
begin
bytes = converter.convert_file(input_path, output_path)
$stderr.puts "Done. (#{bytes} bytes)"
rescue => e
$stderr.puts "ERROR: #{e.message}"
$stderr.puts e.backtrace.first(5).join("\n") if e.backtrace
exit 1
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment