Created
May 17, 2016 21:19
-
-
Save rafbarr/d795033be8073fac6ee1e1ce325ba2ec to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
//usr/bin/env groovy -cp "$(dirname "$0")" "$0" $@; exit $? | |
@Grab(group = 'org.apache.avro', module = 'avro', version = '1.8.0') | |
import org.apache.avro.SchemaValidatorBuilder | |
def cli = new CliBuilder(stopAtNonOption: false) | |
cli._(longOpt: 'reference-schemas', args: 1, argName: 'dir', 'directory with the reference schemas') | |
cli._(longOpt: 'schemas-to-check', args: 1, argName: 'dir', 'directory with the schemas to check') | |
cli.m( | |
longOpt: 'mode', args: 1, argName: 'mode', required: true, | |
'compatibility mode: backwards, forwards or full' | |
) | |
cli.h(longOpt: 'help', 'show this message') | |
def cliOptions | |
if ('-h' in args || '--help' in args) { | |
cli.usage() | |
System.exit(0) | |
} else { | |
cliOptions = cli.parse(args) | |
cliOptions || System.exit(1) | |
} | |
if (!(cliOptions.m in ['backwards', 'forwards', 'full'])) { | |
println "error: Unknown compatibility mode: ${cliOptions.m}" | |
cli.usage() | |
System.exit(1) | |
} | |
def referenceSchemas, schemasToCheck | |
try { | |
referenceSchemas = AvroHelpers | |
.loadSchemasFromFiles(new File(cliOptions.'reference-schemas')) | |
.findAll { it.value.type in AvroHelpers.NAMED_SCHEMA_TYPES } | |
.collectEntries { k, v -> [v.fullName, v] } | |
schemasToCheck = AvroHelpers | |
.loadSchemasFromFiles(new File(cliOptions.'schemas-to-check')) | |
.findAll { it.value.type in AvroHelpers.NAMED_SCHEMA_TYPES } | |
.collectEntries { k, v -> [v.fullName, v] } | |
} catch (e) { | |
println e.message | |
System.exit(1) | |
} | |
def shouldCheckForBackwardsCompat = cliOptions.m in ['backwards', 'full'] | |
def shouldCheckForForwardsCompat = cliOptions.m in ['forwards', 'full'] | |
def backwardsCompatChecker = new SchemaValidatorBuilder().canReadStrategy().validateLatest() | |
def forwardsCompatChecker = new SchemaValidatorBuilder().canBeReadStrategy().validateLatest() | |
def missingSchemas = referenceSchemas.keySet().minus(schemasToCheck.keySet()) | |
if (missingSchemas && shouldCheckForBackwardsCompat) { | |
println "can't find the following schemas: ${missingSchemas.collect { "'$it'" }.join(', ')}" | |
System.exit(1) | |
} | |
System.exit(schemasToCheck.any { k, v -> | |
def failed = false | |
if (k in referenceSchemas) { | |
if (shouldCheckForBackwardsCompat) { | |
try { | |
backwardsCompatChecker.validate(v, [referenceSchemas[k]]) | |
} catch (e) { | |
println "failed backwards compatibility check for '$k' ${v.type.name}" | |
failed = true | |
} | |
} | |
if (shouldCheckForForwardsCompat) { | |
try { | |
forwardsCompatChecker.validate(v, [referenceSchemas[k]]) | |
} catch (e) { | |
println "failed forwards compatibility check for '$k' ${v.type.name}" | |
failed = true | |
} | |
} | |
} | |
failed | |
} ? 1 : 0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grab(group = 'org.apache.avro', module = 'avro', version = '1.8.0') | |
@Grab(group = 'org.yaml', module = 'snakeyaml', version = '1.17') | |
import groovy.io.FileType | |
import groovy.json.JsonOutput | |
import groovy.transform.stc.ClosureParams | |
import groovy.transform.stc.FirstParam | |
import org.apache.avro.Schema | |
import org.yaml.snakeyaml.Yaml | |
import org.yaml.snakeyaml.error.YAMLException | |
class AvroHelpers { | |
static Set<Schema.Type> NAMED_SCHEMA_TYPES = [ | |
Schema.Type.FIXED, | |
Schema.Type.ENUM, | |
Schema.Type.RECORD | |
].toSet() | |
/** | |
* Parses a set of raw schemas resolving any names. | |
* | |
* A raw schema is anything that can be transformed into a valid Avro schema string. For | |
* instance, a {@link File} instance holding an Avro schema could be converted by simply reading | |
* its content. | |
* | |
* @param <T> the type of the raw schema | |
* @param rawSchemas a set of raw schemas | |
* @param schemaAdaptor a closure mapping a raw schema into a valid Avro schema string | |
* @return a mapping from the raw schema to the parsed schema | |
*/ | |
static <T> Map<T, Schema> parse( | |
Set<T> rawSchemas, | |
@ClosureParams(value=FirstParam.class) Closure<String> schemaAdaptor = Closure.IDENTITY | |
) { | |
def remainingRawSchemas = rawSchemas.collect() // defensive copy | |
def schemasByRawSchema = [:] | |
def schemasByName = [:] | |
while (!remainingRawSchemas.isEmpty()) { | |
def undefinedNames = new HashSet() | |
remainingRawSchemas.removeAll { | |
def rawSchemaProcessed = false | |
// bootstrap the parser with previously parsed named schemas | |
def schemaParser = new Schema.Parser().addTypes(schemasByName) | |
try { | |
schemasByRawSchema[it] = schemaParser.parse(schemaAdaptor(it)) | |
schemasByName = schemaParser.types | |
rawSchemaProcessed = true | |
} catch (e) { | |
def undefinedNameMatcher = e.message =~ ~/"(.+)" is not a defined name.*/ | |
if (undefinedNameMatcher.matches()) { | |
undefinedNames << undefinedNameMatcher[0][1] | |
} else { | |
throw new RuntimeException( | |
"parsing error '${e.message}' for schema '$it'" | |
) | |
} | |
} | |
rawSchemaProcessed | |
} | |
// if none of the undefined names can be resolved in the next pass, throw an exception | |
if (undefinedNames && undefinedNames.intersect(schemasByName.keySet()).isEmpty()) { | |
throw new RuntimeException( | |
"couldn't resolve names: ${undefinedNames.collect { "'$it'" }.join(',')}" | |
) | |
} | |
} | |
schemasByRawSchema | |
} | |
/** | |
* Loads and parses all *.yaml and *.avsc Avro schema files from a directory. | |
* | |
* @param rootDir the directory to load the Avro schemas from | |
* @return a mapping from the Avro schema file to the parsed schema | |
*/ | |
static Map<File, Schema> loadSchemasFromFiles(File rootDir) { | |
if (!rootDir.isDirectory()) { | |
throw new IllegalArgumentException("'$rootDir' is not a valid directory") | |
} | |
def schemaFiles = new HashSet() | |
rootDir.traverse(type: FileType.FILES, nameFilter: ~/.*\.(yaml|avsc)/) { | |
schemaFiles << it | |
} | |
def yamlParser = new Yaml() | |
parse(schemaFiles.toSet()) { | |
if (it.path.endsWith(".yaml")) { | |
try { | |
JsonOutput.toJson(yamlParser.load(it.text)) | |
} catch (YAMLException e) { | |
throw new RuntimeException("invalid YAML file '${it.path}': ${e.message}") | |
} | |
} else { | |
it.text | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment