Created
May 17, 2016 19:47
-
-
Save rafbarr/a2b7fd7d41b6f45305b7cabea165563c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
//usr/bin/env groovy -cp "$(dirname "$0")" "$0" $@; exit $? | |
if (args.size() == 0) { | |
println 'missing root dir' | |
System.exit(1) | |
} | |
def linter = new AvroSchemaLinter() | |
try { | |
def lintErrors = AvroHelpers | |
.loadSchemasFromFiles(new File(args[0])) | |
.collectEntries { k, v -> [k, linter.lint(v)] } | |
.findAll { it.value } | |
if (lintErrors) { | |
println( | |
lintErrors.collect { k, v -> | |
"file '$k':\n${v.prettyPrint(4, 4)}" | |
}.join('\n\n') | |
) | |
System.exit(1) | |
} else { | |
println "all looks good!" | |
} | |
} catch(e) { | |
println e.message | |
System.exit(1) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grab(group = 'org.apache.avro', module = 'avro', version = '1.8.0') | |
@Grab(group = 'org.yaml', module = 'snakeyaml', version = '1.17') | |
import groovy.io.FileType | |
import groovy.json.JsonOutput | |
import groovy.transform.stc.ClosureParams | |
import groovy.transform.stc.FirstParam | |
import org.apache.avro.Schema | |
import org.yaml.snakeyaml.Yaml | |
import org.yaml.snakeyaml.error.YAMLException | |
class AvroHelpers { | |
/** | |
* Parses a set of raw schemas resolving any names. | |
* | |
* A raw schema is anything that can be transformed into a valid Avro schema string. For | |
* instance, a {@link File} instance holding an Avro schema could be converted by simply reading | |
* its content. | |
* | |
* @param <T> the type of the raw schema | |
* @param rawSchemas a set of raw schemas | |
* @param schemaAdaptor a closure mapping a raw schema into a valid Avro schema string | |
* @return a mapping from the raw schema to the parsed schema | |
*/ | |
static <T> Map<T, Schema> parse( | |
Set<T> rawSchemas, | |
@ClosureParams(value=FirstParam.class) Closure<String> schemaAdaptor = Closure.IDENTITY | |
) { | |
def remainingRawSchemas = rawSchemas.collect() // defensive copy | |
def schemasByRawSchema = [:] | |
def schemasByName = [:] | |
while (!remainingRawSchemas.isEmpty()) { | |
def undefinedNames = new HashSet() | |
remainingRawSchemas.removeAll { | |
def rawSchemaProcessed = false | |
// bootstrap the parser with previously parsed named schemas | |
def schemaParser = new Schema.Parser().addTypes(schemasByName) | |
try { | |
schemasByRawSchema[it] = schemaParser.parse(schemaAdaptor(it)) | |
schemasByName = schemaParser.types | |
rawSchemaProcessed = true | |
} catch (e) { | |
def undefinedNameMatcher = e.message =~ ~/"(.+)" is not a defined name.*/ | |
if (undefinedNameMatcher.matches()) { | |
undefinedNames << undefinedNameMatcher[0][1] | |
} else { | |
throw new RuntimeException( | |
"parsing error '${e.message}' for schema '$it'" | |
) | |
} | |
} | |
rawSchemaProcessed | |
} | |
// if none of the undefined names can be resolved in the next pass, throw an exception | |
if (undefinedNames && undefinedNames.intersect(schemasByName.keySet()).isEmpty()) { | |
throw new RuntimeException( | |
"couldn't resolve names: ${undefinedNames.collect { "'$it'" }.join(',')}" | |
) | |
} | |
} | |
schemasByRawSchema | |
} | |
/** | |
* Loads and parses all *.yaml and *.avsc Avro schema files from a directory. | |
* | |
* @param rootDir the directory to load the Avro schemas from | |
* @return a mapping from the Avro schema file to the parsed schema | |
*/ | |
static Map<File, Schema> loadSchemasFromFiles(File rootDir) { | |
if (!rootDir.isDirectory()) { | |
throw new IllegalArgumentException("'$rootDir' is not a valid directory") | |
} | |
def schemaFiles = new HashSet() | |
rootDir.traverse(type: FileType.FILES, nameFilter: ~/.*\.(yaml|avsc)/) { | |
schemaFiles << it | |
} | |
def yamlParser = new Yaml() | |
parse(schemaFiles.toSet()) { | |
if (it.path.endsWith(".yaml")) { | |
try { | |
JsonOutput.toJson(yamlParser.load(it.text)) | |
} catch (YAMLException e) { | |
throw new RuntimeException("invalid YAML file '${it.path}': ${e.message}") | |
} | |
} else { | |
it.text | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grab(group = 'org.apache.avro', module = 'avro', version = '1.8.0') | |
import groovy.transform.Immutable | |
import org.apache.avro.Schema | |
import org.apache.avro.Schema.Field | |
import java.util.regex.Pattern | |
@Immutable(knownImmutableClasses = [Pattern]) | |
/** | |
* Very simple linter for Avro schemas. | |
*/ | |
final class AvroSchemaLinter { | |
Pattern typeNamePattern = ~/([a-z][a-z0-9_]*\.)+[A-Z][a-zA-Z0-9]*/ | |
Pattern fieldNamePattern = ~/[a-z][a-z0-9_]*/ | |
Pattern enumSymbolPattern = ~/[A-Z][A-Z0-9_]*/ | |
boolean doNotAllowMissingDoc = true | |
boolean doNotAllowAliases = true | |
private List<AvroSchemaLintError> checkTypeNames(Schema schema) { | |
assert schema.type in [Schema.Type.FIXED, Schema.Type.ENUM, Schema.Type.RECORD] | |
if (!typeNamePattern) { | |
return [] | |
} | |
(schema.aliases + schema.fullName).findAll { !(it ==~ typeNamePattern) }.collect { | |
new AvroSchemaLintError( | |
message: "type name '$it' does not match '$typeNamePattern' pattern" | |
) | |
} | |
} | |
private List<AvroSchemaLintError> checkFieldNames(Field field) { | |
if (!fieldNamePattern) { | |
return [] | |
} | |
(field.aliases() + field.name()).findAll { !(it ==~ fieldNamePattern) }.collect { | |
new AvroSchemaLintError( | |
message: "field name '$it' does not match '$fieldNamePattern' pattern" | |
) | |
} | |
} | |
private List<AvroSchemaLintError> checkMissingDoc(element) { | |
assert element instanceof Schema || element instanceof Field | |
if (doNotAllowMissingDoc && !element.doc?.trim()) { | |
return [new AvroSchemaLintError(message: 'missing documentation')] | |
} | |
return [] | |
} | |
private List<AvroSchemaLintError> checkAliases(element) { | |
assert element instanceof Schema || element instanceof Field | |
if (doNotAllowAliases && element.aliases) { | |
return [new AvroSchemaLintError(message: 'aliases are not allowed')] | |
} | |
return [] | |
} | |
private List<AvroSchemaLintError> visitFixed(Schema schema) { | |
assert schema.type == Schema.Type.FIXED | |
checkTypeNames(schema) + checkMissingDoc(schema) + checkAliases(schema) | |
} | |
private List<AvroSchemaLintError> visitEnum(Schema schema) { | |
assert schema.type == Schema.Type.ENUM | |
def errors = checkTypeNames(schema) + checkMissingDoc(schema) + checkAliases(schema) | |
if (enumSymbolPattern) { | |
schema.enumSymbols.findAll { !(it ==~ enumSymbolPattern) }.each { | |
errors << new AvroSchemaLintError( | |
message: "enum symbol '$it' does not match '$enumSymbolPattern' pattern" | |
) | |
} | |
} | |
errors | |
} | |
private List<AvroSchemaLintError> visitRecord(Schema schema, Set<Schema> alreadyVisited) { | |
assert schema.type == Schema.Type.RECORD | |
def errors = checkTypeNames(schema) + checkMissingDoc(schema) + checkAliases(schema) | |
schema.fields.each { | |
def fieldErrors = visitField(it, alreadyVisited + schema) | |
if (fieldErrors) { | |
errors << new AvroSchemaLintError( | |
message: "field '${it.name()}'", errors: fieldErrors | |
) | |
} | |
} | |
errors | |
} | |
private List<AvroSchemaLintError> visitField(Field field, Set<Schema> alreadyVisited) { | |
checkFieldNames(field) + | |
checkMissingDoc(field) + | |
checkAliases(field) + | |
visitSchema(field.schema(), alreadyVisited) | |
} | |
private List<AvroSchemaLintError> visitSchema(Schema schema, Set<Schema> alreadyVisited) { | |
if (schema in alreadyVisited) { | |
return [] | |
} | |
def errors | |
if (schema.type == Schema.Type.ARRAY) { | |
errors = visitSchema(schema.elementType, alreadyVisited + schema) | |
} else if (schema.type == Schema.Type.MAP) { | |
errors = visitSchema(schema.valueType, alreadyVisited + schema) | |
} else if (schema.type == Schema.Type.UNION) { | |
errors = schema.types.collect { visitSchema(it, alreadyVisited + schema) }.flatten() | |
} else if (schema.type == Schema.Type.FIXED) { | |
errors = visitFixed(schema) | |
} else if (schema.type == Schema.Type.ENUM) { | |
errors = visitEnum(schema) | |
} else if (schema.type == Schema.Type.RECORD) { | |
errors = visitRecord(schema, alreadyVisited) | |
} | |
if (errors) { | |
def message = schema.fullName | |
if (schema.fullName != schema.type.name.toLowerCase()) { | |
message = "${schema.type.name.toLowerCase()} '${schema.fullName}'" | |
} | |
[new AvroSchemaLintError(message: message, errors: errors)] | |
} else { | |
[] | |
} | |
} | |
AvroSchemaLintError lint(Schema schema) { | |
def errors = visitSchema(schema, new HashSet()) | |
if (errors) { | |
errors[0] | |
} else { | |
null | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import groovy.transform.Immutable | |
@Immutable | |
/** | |
* Tree like structure holding lint errors. | |
*/ | |
class AvroSchemaLintError { | |
String message | |
List<AvroSchemaLintError> errors = [] | |
String prettyPrint(int indentLevel, int indentLevelIncrement) { | |
def ret = ' ' * indentLevel + message | |
if (errors) { | |
ret = ret + ':\n' + errors.collect { | |
it.prettyPrint(indentLevel + indentLevelIncrement, indentLevelIncrement) | |
}.join("\n") | |
} | |
ret | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment