Last active
March 17, 2025 22:19
-
-
Save FabienDehopre/5245476 to your computer and use it in GitHub Desktop.
Validate C# identifier name
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
using System.Text.RegularExpressions; | |
public static class IdentifierExtensions | |
{ | |
// definition of a valid C# identifier: http://msdn.microsoft.com/en-us/library/aa664670(v=vs.71).aspx | |
private const string FORMATTING_CHARACTER = @"\p{Cf}"; | |
private const string CONNECTING_CHARACTER = @"\p{Pc}"; | |
private const string DECIMAL_DIGIT_CHARACTER = @"\p{Nd}"; | |
private const string COMBINING_CHARACTER = @"\p{Mn}|\p{Mc}"; | |
private const string LETTER_CHARACTER = @"\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}"; | |
private const string IDENTIFIER_PART_CHARACTER = LETTER_CHARACTER + "|" + | |
DECIMAL_DIGIT_CHARACTER + "|" + | |
CONNECTING_CHARACTER + "|" + | |
COMBINING_CHARACTER + "|" + | |
FORMATTING_CHARACTER; | |
private const string IDENTIFIER_PART_CHARACTERS = "(" + IDENTIFIER_PART_CHARACTER + ")+"; | |
private const string IDENTIFIER_START_CHARACTER = "(" + LETTER_CHARACTER + "|_)"; | |
private const string IDENTIFIER_OR_KEYWORD = IDENTIFIER_START_CHARACTER + "(" + | |
IDENTIFIER_PART_CHARACTERS + ")*"; | |
// C# keywords: http://msdn.microsoft.com/en-us/library/x53a06bb(v=vs.71).aspx | |
private static readonly HashSet<string> _keywords = new HashSet<string> | |
{ | |
"__arglist", "__makeref", "__reftype", "__refvalue", | |
"abstract", "as", "base", "bool", | |
"break", "byte", "case", "catch", | |
"char", "checked", "class", "const", | |
"continue", "decimal", "default", "delegate", | |
"do", "double", "else", "enum", | |
"event", "explicit", "extern", "false", | |
"finally", "fixed", "float", "for", | |
"foreach", "goto", "if", "implicit", | |
"in", "int", "interface", "internal", | |
"is", "lock", "long", "namespace", | |
"new", "null", "object", "operator", | |
"out", "override", "params", "private", | |
"protected", "public", "readonly", "ref", | |
"return", "sbyte", "sealed", "short", | |
"sizeof", "stackalloc", "static", "string", | |
"struct", "switch", "this", "throw", | |
"true", "try", "typeof", "uint", | |
"ulong", "unchecked", "unsafe", "ushort", | |
"using", "virtual", "volatile", "void", | |
"while" | |
}; | |
private static readonly Regex _validIdentifierRegex = new Regex("^" + IDENTIFIER_OR_KEYWORD + "$", RegexOptions.Compiled); | |
public static bool IsValidIdentifier(this string identifier) | |
{ | |
if (String.IsNullOrWhiteSpace(identifier)) return false; | |
var normalizedIdentifier = identifier.Normalize(); | |
// 1. check that the identifier match the validIdentifer regex and it's not a C# keyword | |
if (_validIdentifierRegex.IsMatch(normalizedIdentifier) && !_keywords.Contains(normalizedIdentifier)) | |
{ | |
return true; | |
} | |
// 2. check if the identifier starts with @ | |
if (normalizedIdentifier.StartsWith("@") && _validIdentifierRegex.IsMatch(normalizedIdentifier.Substring(1))) | |
{ | |
return true; | |
} | |
// 3. it's not a valid identifier | |
return false; | |
} | |
} |
Thanks @bpierson. I've update the gist.
Of course, the list of keywords should also be updated but I've currently not the free time to do it.
Your hashset is missing four keywords: __arglist, __makeref, __reftype and __refvalue. Though they are not mentioned in the documentation, they exist and are not valid identifiers.
Thanks @piotrstenke. I've updated the list of identifiers.
I also took the opportunity to rearrange the list of identifiers and to format the code using the dotnet-format tool.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is a test that highlights the difference. It can be quite dramatic with larger inputs:
This gives the following results on my machine:
Array: 500000 iterations, 2000000 matches, 1528 ms
HashSet: 500000 iterations, 2000000 matches, 104 ms
Press any key to continue . . .