Created
October 18, 2022 07:47
-
-
Save uhfath/4189ac4090a182c06cbc4a097ee12c16 to your computer and use it in GitHub Desktop.
RegEx Highlighter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Highlighter | |
{ | |
private const string TempStartToken = "\x02"; | |
private const string TempEndToken = "\x03"; | |
private const string SearchPatternTemplate = $"[{TempStartToken}{TempEndToken}]*{{0}}"; | |
private const string ReplacePattern = $"{TempStartToken}$&{TempEndToken}"; | |
private readonly ConcurrentDictionary<HighlightKey, Regex> _regexPatternsCache = new(); | |
private static string GetHighlightTypeTemplate(HighlightType highlightType) => | |
highlightType switch | |
{ | |
HighlightType.Starts => "^{0}", | |
HighlightType.Contains => "{0}", | |
HighlightType.Ends => "{0}$", | |
HighlightType.Equals => "^{0}$", | |
_ => throw new ArgumentException($"Unsupported {nameof(HighlightType)}: '{highlightType}'", nameof(highlightType)), | |
}; | |
public string Highlight(string text, IReadOnlySet<string> words, string startToken, string endToken, HighlightType highlightType) | |
{ | |
foreach (var word in words) | |
{ | |
var key = new HighlightKey | |
{ | |
Word = word, | |
HighlightType = highlightType, | |
}; | |
var regex = _regexPatternsCache.GetOrAdd(key, _ => | |
{ | |
var parts = word.Select(w => string.Format(SearchPatternTemplate, Regex.Escape(w.ToString()))); | |
var pattern = string.Concat(parts); | |
var highlightPattern = string.Format(GetHighlightTypeTemplate(highlightType), pattern); | |
return new Regex(highlightPattern, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled); | |
}); | |
text = regex.Replace(text, ReplacePattern); | |
} | |
return text | |
.Replace(TempStartToken, startToken) | |
.Replace(TempEndToken, endToken) | |
; | |
} | |
private record HighlightKey | |
{ | |
public string Word { get; init; } | |
public HighlightType HighlightType { get; init; } | |
} | |
} | |
public enum HighlightType | |
{ | |
Starts, | |
Contains, | |
Ends, | |
Equals, | |
} | |
void Main() | |
{ | |
var queries = new[] { "abc" }.ToHashSet(); | |
var search = "a ab abc abcd abcde"; | |
var highlighter = new Highlighter(); | |
var outputs = search | |
.Split((string[])null, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) | |
.Select(w => highlighter.Highlight(w, queries, "<b>", "</b>", HighlightType.Starts)) | |
; | |
var result = string.Join(" ", outputs).Dump(); | |
Util.RawHtml(result).Dump(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment