RomainTT · December 31, 2021 11:21 · RomainTT · Dec 31, 2021
diff --git a/csv-schema.lark b/csv-schema.lark
 // RULES                                                                                                            // -----                                                                                                                                                                                                                                schema : prolog body
 prolog : versiondecl globaldirectives                                                                               versiondecl : ("version 1.0" | "version 1.1" | "version 1.2")
 globaldirectives : separatordirective? quoteddirective? totalcolumnsdirective? permitemptydirective? (noheaderdirective | ignorecolumnnamecasedirective)?                                                                               directiveprefix : "@"
 separatordirective : directiveprefix "separator" (separatortabexpr | separatorchar)
 separatortabexpr : "tab" | "\t"
 separatorchar : CHARACTERLITERAL
 quoteddirective : directiveprefix "quoted"
 totalcolumnsdirective : directiveprefix "totalColumns" POSITIVENONZEROINTEGERLITERAL
 permitemptydirective : directiveprefix "permitEmpty"
 noheaderdirective : directiveprefix "noHeader"
 ignorecolumnnamecasedirective : directiveprefix "ignoreColumnNameCase"
 body : bodypart+
 bodypart : comment* columndefinition comment*
 comment : singlelinecomment | multilinecomment
 singlelinecomment : /\/\/[\S\t ]*/
 multilinecomment : /\\\*(?:[^*\r\n]+|(?:\r?\n))*\\\*/
 columndefinition : (columnidentifier | quotedcolumnidentifier) ":" columnrule
 columnidentifier : POSITIVENONZEROINTEGERLITERAL | IDENT
 quotedcolumnidentifier : STRINGLITERAL
 columnrule : columnvalidationexpr* columndirectives
 columndirectives : optionaldirective? matchisfalsedirective? ignorecasedirective? warningdirective?
 optionaldirective : directiveprefix "optional"
 matchisfalsedirective : directiveprefix "matchIsFalse"
 ignorecasedirective : directiveprefix "ignoreCase"
 warningdirective : directiveprefix "warningDirective"
 columnvalidationexpr : combinatorialexpr | noncombinatorialexpr
 combinatorialexpr : orexpr | andexpr
 orexpr : noncombinatorialexpr "or" columnvalidationexpr
 andexpr : noncombinatorialexpr "and" columnvalidationexpr
 noncombinatorialexpr : nonconditionalexpr | conditionalexpr
 nonconditionalexpr : singleexpr | externalsingleexpr | parenthesizedexpr
 singleexpr : explicitcontextexpr? (isexpr | anyexpr | notexpr | inexpr | startswithexpr | endswithexpr | regexpexpr | rangeexpr | lengthexpr | emptyexpr | notemptyexpr | uniqueexpr | uriexpr | xsddatetimeexpr | xsddatetimewithtimezoneexpr | xsddateexpr | xsdtimeexpr | ukdateexpr | dateexpr | partialukdateexpr | partialdateexpr | uuid4expr | positiveintegerexpr | uppercaseexpr | lowercaseexpr | identicalexpr)
 explicitcontextexpr : columnref "/"
 columnref : "$" (columnidentifier | quotedcolumnidentifier)
 isexpr : "is(" stringprovider ")"
 anyexpr : "any(" stringprovider ")"
 notexpr : "not(" stringprovider ")"
 inexpr : "in(" stringprovider ")"
 startswithexpr : "starts(" stringprovider ")"
 endswithexpr : "ends(" stringprovider ")"
 regexpexpr : "regex(" STRINGLITERAL ")"
 rangeexpr : "range(" (numericorany "," NUMERICLITERAL | NUMERICLITERAL "," numericorany) ")"
 numericorany : NUMERICLITERAL | WILDCARDLITERAL
 lengthexpr : "length(" (positiveintegerorany ",")? positiveintegerorany ")"
 positiveintegerorany : POSITIVEINTEGERLITERAL | WILDCARDLITERAL
 emptyexpr : "empty"
 notemptyexpr : "notEmpty"
 uniqueexpr : "unique" ("(" columnref ("," columnref)* ")")?
 uriexpr : "uri"
 xsddatetimeexpr : "xdatetime" ("(" XSDDATETIMELITERAL "," XSDDATETIMELITERAL ")")?
 xsddatetimewithtimezoneexpr : "xdatetimetz" ("(" XSDDATETIMEWITHTIMEZONELITERAL "," XSDDATETIMEWITHTIMEZONELITERAL ")")?
 xsddateexpr : "xdate" ("(" XSDDATELITERAL "," XSDDATELITERAL ")")?
 xsdtimeexpr : "xtime" ("(" XSDTIMELITERAL "," XSDTIMELITERAL ")")?
 ukdateexpr : "ukdate" ("(" UKDATELITERAL "," UKDATELITERAL ")")?
 dateexpr : "date(" stringprovider "," stringprovider "," stringprovider ("," XSDDATELITERAL "," XSDDATELITERAL)? ")"

 partialukdateexpr : "partukdate"
 partialdateexpr : "partdate(" stringprovider "," stringprovider "," stringprovider ")"
 uuid4expr : "uuid4"
 positiveintegerexpr : "positiveinteger"
 uppercaseexpr : "uppercase"
 lowercaseexpr : "lowercase"
 identicalexpr : "positiveinteger"
 externalsingleexpr : explicitcontextexpr? (fileexistsexpr | integritycheckexpr | checksumexpr | filecountexpr)
 fileexistsexpr : "fileexists" ("(" stringprovider ")")?
 integritycheckexpr : "integritycheck" "(" (stringprovider ",")? (stringprovider ",")? ("\"includefolder\"" | "\"excludefolder\"") ")"
 checksumexpr : "checksum(" fileexpr "," STRINGLITERAL ")"
 fileexpr : "file(" (stringprovider ",")? stringprovider ")"
 filecountexpr : "filecount(" fileexpr ")"
 stringprovider : columnref | STRINGLITERAL | concatexpr | noextexpr | uridecodeexpr
 concatexpr : "concat(" stringprovider ("," stringprovider)+ ")"
 noextexpr : "noext(" stringprovider ")"
 uridecodeexpr : "uridecode(" stringprovider ("," stringprovider)? ")"
 parenthesizedexpr : "(" columnvalidationexpr+ ")"
 conditionalexpr : ifexpr | switchexpr
 ifexpr : "if(" (combinatorialexpr | nonconditionalexpr) "," columnvalidationexpr+ ("," columnvalidationexpr+)? ")"
 switchexpr : "switch(" switchcaseexpr+ ("," columnvalidationexpr+)? ")"
 switchcaseexpr : "if(" (combinatorialexpr | nonconditionalexpr) "," columnvalidationexpr+ ")"


 // TERMINALS
 // ---------

 XSDDATETIMELITERAL : XSDDATEWITHOUTTIMEZONECOMPONENT "t" XSDTIMELITERAL
 XSDDATETIMEWITHTIMEZONELITERAL : XSDDATEWITHOUTTIMEZONECOMPONENT "t" XSDTIMEWITHOUTTIMEZONECOMPONENT XSDTIMEZONECOMPONENT
 XSDDATELITERAL : XSDDATEWITHOUTTIMEZONECOMPONENT XSDOPTIONALTIMEZONECOMPONENT
 XSDTIMELITERAL : XSDTIMEWITHOUTTIMEZONECOMPONENT XSDTIMEZONECOMPONENT
 XSDDATEWITHOUTTIMEZONECOMPONENT : /[0-9]{4}-(((0(1|3|5|7|8)|1(0|2))-(0[1-9]|(1|2)[0-9]|3[0-1]))|((0(4|6|9)|11)-(0[1-9]|(1|2)[0-9]|30))|(02-(0[1-9]|(1|2)[0-9])))/
 XSDTIMEWITHOUTTIMEZONECOMPONENT : /([0-1][0-9]|2[0-4]):(0[0-9]|[1-5][0-9]):(0[0-9]|[1-5][0-9])(\.[0-9]{3})?/
 XSDOPTIONALTIMEZONECOMPONENT : /((\+|-)(0[1-9]|1[0-9]|2[0-4]):(0[0-9]|[1-5][0-9])|z)/
 XSDTIMEZONECOMPONENT : /((\+|-)(0[1-9]|1[0-9]|2[0-4]):(0[0-9]|[1-5][0-9])|z)/
 UKDATELITERAL : /(((0[1-9]|(1|2)[0-9]|3[0-1])\/(0(1|3|5|7|8)|1(0|2)))|((0[1-9]|(1|2)[0-9]|30)\/(0(4|6|9)|11))|((0[1-9]|(1|2)[0-9])\/02))\/[0-9]{4}/
 POSITIVENONZEROINTEGERLITERAL : /[1-9][0-9]*/
 POSITIVEINTEGERLITERAL : /[0-9]+/
 NUMERICLITERAL : /[0-9]+(\.[0-9]+)?/
 STRINGLITERAL : "\"" /[^"]+/ "\""
 CHARACTERLITERAL : "'" /[^\r\n\f']/ "'"
 WILDCARDLITERAL : "*"
 IDENT : /[a-za-z0-9\-_\.]+/

 // LARK SPECIFIC
 // -------------

 %import common.NEWLINE
 %import common.WS
 %ignore NEWLINE
 %ignore WS
	// RULES // ----- schema : prolog body
	prolog : versiondecl globaldirectives versiondecl : ("version 1.0" \| "version 1.1" \| "version 1.2")
	globaldirectives : separatordirective? quoteddirective? totalcolumnsdirective? permitemptydirective? (noheaderdirective \| ignorecolumnnamecasedirective)? directiveprefix : "@"
	separatordirective : directiveprefix "separator" (separatortabexpr \| separatorchar)
	separatortabexpr : "tab" \| "\t"
	separatorchar : CHARACTERLITERAL
	quoteddirective : directiveprefix "quoted"
	totalcolumnsdirective : directiveprefix "totalColumns" POSITIVENONZEROINTEGERLITERAL
	permitemptydirective : directiveprefix "permitEmpty"
	noheaderdirective : directiveprefix "noHeader"
	ignorecolumnnamecasedirective : directiveprefix "ignoreColumnNameCase"
	body : bodypart+
	bodypart : comment* columndefinition comment*
	comment : singlelinecomment \| multilinecomment
	singlelinecomment : /\/\/[\S\t ]*/
	multilinecomment : /\\\(?:[^\r\n]+\|(?:\r?\n))\\\/
	columndefinition : (columnidentifier \| quotedcolumnidentifier) ":" columnrule
	columnidentifier : POSITIVENONZEROINTEGERLITERAL \| IDENT
	quotedcolumnidentifier : STRINGLITERAL
	columnrule : columnvalidationexpr* columndirectives
	columndirectives : optionaldirective? matchisfalsedirective? ignorecasedirective? warningdirective?
	optionaldirective : directiveprefix "optional"
	matchisfalsedirective : directiveprefix "matchIsFalse"
	ignorecasedirective : directiveprefix "ignoreCase"
	warningdirective : directiveprefix "warningDirective"
	columnvalidationexpr : combinatorialexpr \| noncombinatorialexpr
	combinatorialexpr : orexpr \| andexpr
	orexpr : noncombinatorialexpr "or" columnvalidationexpr
	andexpr : noncombinatorialexpr "and" columnvalidationexpr
	noncombinatorialexpr : nonconditionalexpr \| conditionalexpr
	nonconditionalexpr : singleexpr \| externalsingleexpr \| parenthesizedexpr
	singleexpr : explicitcontextexpr? (isexpr \| anyexpr \| notexpr \| inexpr \| startswithexpr \| endswithexpr \| regexpexpr \| rangeexpr \| lengthexpr \| emptyexpr \| notemptyexpr \| uniqueexpr \| uriexpr \| xsddatetimeexpr \| xsddatetimewithtimezoneexpr \| xsddateexpr \| xsdtimeexpr \| ukdateexpr \| dateexpr \| partialukdateexpr \| partialdateexpr \| uuid4expr \| positiveintegerexpr \| uppercaseexpr \| lowercaseexpr \| identicalexpr)
	explicitcontextexpr : columnref "/"
	columnref : "$" (columnidentifier \| quotedcolumnidentifier)
	isexpr : "is(" stringprovider ")"
	anyexpr : "any(" stringprovider ")"
	notexpr : "not(" stringprovider ")"
	inexpr : "in(" stringprovider ")"
	startswithexpr : "starts(" stringprovider ")"
	endswithexpr : "ends(" stringprovider ")"
	regexpexpr : "regex(" STRINGLITERAL ")"
	rangeexpr : "range(" (numericorany "," NUMERICLITERAL \| NUMERICLITERAL "," numericorany) ")"
	numericorany : NUMERICLITERAL \| WILDCARDLITERAL
	lengthexpr : "length(" (positiveintegerorany ",")? positiveintegerorany ")"
	positiveintegerorany : POSITIVEINTEGERLITERAL \| WILDCARDLITERAL
	emptyexpr : "empty"
	notemptyexpr : "notEmpty"
	uniqueexpr : "unique" ("(" columnref ("," columnref)* ")")?
	uriexpr : "uri"
	xsddatetimeexpr : "xdatetime" ("(" XSDDATETIMELITERAL "," XSDDATETIMELITERAL ")")?
	xsddatetimewithtimezoneexpr : "xdatetimetz" ("(" XSDDATETIMEWITHTIMEZONELITERAL "," XSDDATETIMEWITHTIMEZONELITERAL ")")?
	xsddateexpr : "xdate" ("(" XSDDATELITERAL "," XSDDATELITERAL ")")?
	xsdtimeexpr : "xtime" ("(" XSDTIMELITERAL "," XSDTIMELITERAL ")")?
	ukdateexpr : "ukdate" ("(" UKDATELITERAL "," UKDATELITERAL ")")?
	dateexpr : "date(" stringprovider "," stringprovider "," stringprovider ("," XSDDATELITERAL "," XSDDATELITERAL)? ")"

	partialukdateexpr : "partukdate"
	partialdateexpr : "partdate(" stringprovider "," stringprovider "," stringprovider ")"
	uuid4expr : "uuid4"
	positiveintegerexpr : "positiveinteger"
	uppercaseexpr : "uppercase"
	lowercaseexpr : "lowercase"
	identicalexpr : "positiveinteger"
	externalsingleexpr : explicitcontextexpr? (fileexistsexpr \| integritycheckexpr \| checksumexpr \| filecountexpr)
	fileexistsexpr : "fileexists" ("(" stringprovider ")")?
	integritycheckexpr : "integritycheck" "(" (stringprovider ",")? (stringprovider ",")? ("\"includefolder\"" \| "\"excludefolder\"") ")"
	checksumexpr : "checksum(" fileexpr "," STRINGLITERAL ")"
	fileexpr : "file(" (stringprovider ",")? stringprovider ")"
	filecountexpr : "filecount(" fileexpr ")"
	stringprovider : columnref \| STRINGLITERAL \| concatexpr \| noextexpr \| uridecodeexpr
	concatexpr : "concat(" stringprovider ("," stringprovider)+ ")"
	noextexpr : "noext(" stringprovider ")"
	uridecodeexpr : "uridecode(" stringprovider ("," stringprovider)? ")"
	parenthesizedexpr : "(" columnvalidationexpr+ ")"
	conditionalexpr : ifexpr \| switchexpr
	ifexpr : "if(" (combinatorialexpr \| nonconditionalexpr) "," columnvalidationexpr+ ("," columnvalidationexpr+)? ")"
	switchexpr : "switch(" switchcaseexpr+ ("," columnvalidationexpr+)? ")"
	switchcaseexpr : "if(" (combinatorialexpr \| nonconditionalexpr) "," columnvalidationexpr+ ")"


	// TERMINALS
	// ---------

	XSDDATETIMELITERAL : XSDDATEWITHOUTTIMEZONECOMPONENT "t" XSDTIMELITERAL
	XSDDATETIMEWITHTIMEZONELITERAL : XSDDATEWITHOUTTIMEZONECOMPONENT "t" XSDTIMEWITHOUTTIMEZONECOMPONENT XSDTIMEZONECOMPONENT
	XSDDATELITERAL : XSDDATEWITHOUTTIMEZONECOMPONENT XSDOPTIONALTIMEZONECOMPONENT
	XSDTIMELITERAL : XSDTIMEWITHOUTTIMEZONECOMPONENT XSDTIMEZONECOMPONENT
	XSDDATEWITHOUTTIMEZONECOMPONENT : /[0-9]{4}-(((0(1\|3\|5\|7\|8)\|1(0\|2))-(0[1-9]\|(1\|2)[0-9]\|3[0-1]))\|((0(4\|6\|9)\|11)-(0[1-9]\|(1\|2)[0-9]\|30))\|(02-(0[1-9]\|(1\|2)[0-9])))/
	XSDTIMEWITHOUTTIMEZONECOMPONENT : /([0-1][0-9]\|2[0-4]):(0[0-9]\|[1-5][0-9]):(0[0-9]\|[1-5][0-9])(\.[0-9]{3})?/
	XSDOPTIONALTIMEZONECOMPONENT : /((\+\|-)(0[1-9]\|1[0-9]\|2[0-4]):(0[0-9]\|[1-5][0-9])\|z)/
	XSDTIMEZONECOMPONENT : /((\+\|-)(0[1-9]\|1[0-9]\|2[0-4]):(0[0-9]\|[1-5][0-9])\|z)/
	UKDATELITERAL : /(((0[1-9]\|(1\|2)[0-9]\|3[0-1])\/(0(1\|3\|5\|7\|8)\|1(0\|2)))\|((0[1-9]\|(1\|2)[0-9]\|30)\/(0(4\|6\|9)\|11))\|((0[1-9]\|(1\|2)[0-9])\/02))\/[0-9]{4}/
	POSITIVENONZEROINTEGERLITERAL : /[1-9][0-9]*/
	POSITIVEINTEGERLITERAL : /[0-9]+/
	NUMERICLITERAL : /[0-9]+(\.[0-9]+)?/
	STRINGLITERAL : "\"" /[^"]+/ "\""
	CHARACTERLITERAL : "'" /[^\r\n\f']/ "'"
	WILDCARDLITERAL : "*"
	IDENT : /[a-za-z0-9\-_\.]+/

	// LARK SPECIFIC
	// -------------

	%import common.NEWLINE
	%import common.WS
	%ignore NEWLINE
	%ignore WS