using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; namespace S16.Text { public class UniEncodeDecode { public UniEncodeDecode() { } public string Encode(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; string output = string.Empty; for (int i = 0; i < text.Length; i++) { char ch = text[i]; if (ch < (char)0xFF) { output += ch.ToString(); } else { output += "\\u" + ((int)ch).ToString("X"); } } return output; } public string EncodeXml(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; string output = string.Empty; for (int i = 0; i < text.Length; i++) { char ch = text[i]; if (ch < (char)0xFF) { output += ch.ToString(); } else { output += "&#x" + ((int)ch).ToString("X") + ";"; } } return output; } public string Decode(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; string output = string.Empty; Regex regex = new Regex("\\u([0-9A_F]{4})", RegexOptions.None); Match match = regex.Match(text); while (match.Success) { match = match.NextMatch(); } return output; } internal static class HtmlUtils { #region Fields and Consts /// <summary> /// List of html tags that don't have content /// </summary> private static readonly List<string> _list = new List<string>( new[] { "area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "isindex", "link", "meta", "param" } ); /// <summary> /// the html encode\decode pairs /// </summary> private static readonly KeyValuePair<string, string>[] _encodeDecode = new[] { new KeyValuePair<string, string>("<", "<"), new KeyValuePair<string, string>(">", ">"), new KeyValuePair<string, string>(""", "\""), new KeyValuePair<string, string>("&", "&"), }; /// <summary> /// the html decode only pairs /// </summary> private static readonly Dictionary<string, char> _decodeOnly = new Dictionary<string, char>(StringComparer.InvariantCultureIgnoreCase); #endregion /// <summary> /// Init. /// </summary> static HtmlUtils() { _decodeOnly["nbsp"] = ' '; _decodeOnly["rdquo"] = '"'; _decodeOnly["lsquo"] = '\''; _decodeOnly["apos"] = '\''; // ISO 8859-1 Symbols _decodeOnly["iexcl"] = Convert.ToChar(161); _decodeOnly["cent"] = Convert.ToChar(162); _decodeOnly["pound"] = Convert.ToChar(163); _decodeOnly["curren"] = Convert.ToChar(164); _decodeOnly["yen"] = Convert.ToChar(165); _decodeOnly["brvbar"] = Convert.ToChar(166); _decodeOnly["sect"] = Convert.ToChar(167); _decodeOnly["uml"] = Convert.ToChar(168); _decodeOnly["copy"] = Convert.ToChar(169); _decodeOnly["ordf"] = Convert.ToChar(170); _decodeOnly["laquo"] = Convert.ToChar(171); _decodeOnly["not"] = Convert.ToChar(172); _decodeOnly["shy"] = Convert.ToChar(173); _decodeOnly["reg"] = Convert.ToChar(174); _decodeOnly["macr"] = Convert.ToChar(175); _decodeOnly["deg"] = Convert.ToChar(176); _decodeOnly["plusmn"] = Convert.ToChar(177); _decodeOnly["sup2"] = Convert.ToChar(178); _decodeOnly["sup3"] = Convert.ToChar(179); _decodeOnly["acute"] = Convert.ToChar(180); _decodeOnly["micro"] = Convert.ToChar(181); _decodeOnly["para"] = Convert.ToChar(182); _decodeOnly["middot"] = Convert.ToChar(183); _decodeOnly["cedil"] = Convert.ToChar(184); _decodeOnly["sup1"] = Convert.ToChar(185); _decodeOnly["ordm"] = Convert.ToChar(186); _decodeOnly["raquo"] = Convert.ToChar(187); _decodeOnly["frac14"] = Convert.ToChar(188); _decodeOnly["frac12"] = Convert.ToChar(189); _decodeOnly["frac34"] = Convert.ToChar(190); _decodeOnly["iquest"] = Convert.ToChar(191); _decodeOnly["times"] = Convert.ToChar(215); _decodeOnly["divide"] = Convert.ToChar(247); // ISO 8859-1 Characters _decodeOnly["Agrave"] = Convert.ToChar(192); _decodeOnly["Aacute"] = Convert.ToChar(193); _decodeOnly["Acirc"] = Convert.ToChar(194); _decodeOnly["Atilde"] = Convert.ToChar(195); _decodeOnly["Auml"] = Convert.ToChar(196); _decodeOnly["Aring"] = Convert.ToChar(197); _decodeOnly["AElig"] = Convert.ToChar(198); _decodeOnly["Ccedil"] = Convert.ToChar(199); _decodeOnly["Egrave"] = Convert.ToChar(200); _decodeOnly["Eacute"] = Convert.ToChar(201); _decodeOnly["Ecirc"] = Convert.ToChar(202); _decodeOnly["Euml"] = Convert.ToChar(203); _decodeOnly["Igrave"] = Convert.ToChar(204); _decodeOnly["Iacute"] = Convert.ToChar(205); _decodeOnly["Icirc"] = Convert.ToChar(206); _decodeOnly["Iuml"] = Convert.ToChar(207); _decodeOnly["ETH"] = Convert.ToChar(208); _decodeOnly["Ntilde"] = Convert.ToChar(209); _decodeOnly["Ograve"] = Convert.ToChar(210); _decodeOnly["Oacute"] = Convert.ToChar(211); _decodeOnly["Ocirc"] = Convert.ToChar(212); _decodeOnly["Otilde"] = Convert.ToChar(213); _decodeOnly["Ouml"] = Convert.ToChar(214); _decodeOnly["Oslash"] = Convert.ToChar(216); _decodeOnly["Ugrave"] = Convert.ToChar(217); _decodeOnly["Uacute"] = Convert.ToChar(218); _decodeOnly["Ucirc"] = Convert.ToChar(219); _decodeOnly["Uuml"] = Convert.ToChar(220); _decodeOnly["Yacute"] = Convert.ToChar(221); _decodeOnly["THORN"] = Convert.ToChar(222); _decodeOnly["szlig"] = Convert.ToChar(223); _decodeOnly["agrave"] = Convert.ToChar(224); _decodeOnly["aacute"] = Convert.ToChar(225); _decodeOnly["acirc"] = Convert.ToChar(226); _decodeOnly["atilde"] = Convert.ToChar(227); _decodeOnly["auml"] = Convert.ToChar(228); _decodeOnly["aring"] = Convert.ToChar(229); _decodeOnly["aelig"] = Convert.ToChar(230); _decodeOnly["ccedil"] = Convert.ToChar(231); _decodeOnly["egrave"] = Convert.ToChar(232); _decodeOnly["eacute"] = Convert.ToChar(233); _decodeOnly["ecirc"] = Convert.ToChar(234); _decodeOnly["euml"] = Convert.ToChar(235); _decodeOnly["igrave"] = Convert.ToChar(236); _decodeOnly["iacute"] = Convert.ToChar(237); _decodeOnly["icirc"] = Convert.ToChar(238); _decodeOnly["iuml"] = Convert.ToChar(239); _decodeOnly["eth"] = Convert.ToChar(240); _decodeOnly["ntilde"] = Convert.ToChar(241); _decodeOnly["ograve"] = Convert.ToChar(242); _decodeOnly["oacute"] = Convert.ToChar(243); _decodeOnly["ocirc"] = Convert.ToChar(244); _decodeOnly["otilde"] = Convert.ToChar(245); _decodeOnly["ouml"] = Convert.ToChar(246); _decodeOnly["oslash"] = Convert.ToChar(248); _decodeOnly["ugrave"] = Convert.ToChar(249); _decodeOnly["uacute"] = Convert.ToChar(250); _decodeOnly["ucirc"] = Convert.ToChar(251); _decodeOnly["uuml"] = Convert.ToChar(252); _decodeOnly["yacute"] = Convert.ToChar(253); _decodeOnly["thorn"] = Convert.ToChar(254); _decodeOnly["yuml"] = Convert.ToChar(255); // Math Symbols Supported by HTML _decodeOnly["forall"] = Convert.ToChar(8704); _decodeOnly["part"] = Convert.ToChar(8706); _decodeOnly["exist"] = Convert.ToChar(8707); _decodeOnly["empty"] = Convert.ToChar(8709); _decodeOnly["nabla"] = Convert.ToChar(8711); _decodeOnly["isin"] = Convert.ToChar(8712); _decodeOnly["notin"] = Convert.ToChar(8713); _decodeOnly["ni"] = Convert.ToChar(8715); _decodeOnly["prod"] = Convert.ToChar(8719); _decodeOnly["sum"] = Convert.ToChar(8721); _decodeOnly["minus"] = Convert.ToChar(8722); _decodeOnly["lowast"] = Convert.ToChar(8727); _decodeOnly["radic"] = Convert.ToChar(8730); _decodeOnly["prop"] = Convert.ToChar(8733); _decodeOnly["infin"] = Convert.ToChar(8734); _decodeOnly["ang"] = Convert.ToChar(8736); _decodeOnly["and"] = Convert.ToChar(8743); _decodeOnly["or"] = Convert.ToChar(8744); _decodeOnly["cap"] = Convert.ToChar(8745); _decodeOnly["cup"] = Convert.ToChar(8746); _decodeOnly["int"] = Convert.ToChar(8747); _decodeOnly["there4"] = Convert.ToChar(8756); _decodeOnly["sim"] = Convert.ToChar(8764); _decodeOnly["cong"] = Convert.ToChar(8773); _decodeOnly["asymp"] = Convert.ToChar(8776); _decodeOnly["ne"] = Convert.ToChar(8800); _decodeOnly["equiv"] = Convert.ToChar(8801); _decodeOnly["le"] = Convert.ToChar(8804); _decodeOnly["ge"] = Convert.ToChar(8805); _decodeOnly["sub"] = Convert.ToChar(8834); _decodeOnly["sup"] = Convert.ToChar(8835); _decodeOnly["nsub"] = Convert.ToChar(8836); _decodeOnly["sube"] = Convert.ToChar(8838); _decodeOnly["supe"] = Convert.ToChar(8839); _decodeOnly["oplus"] = Convert.ToChar(8853); _decodeOnly["otimes"] = Convert.ToChar(8855); _decodeOnly["perp"] = Convert.ToChar(8869); _decodeOnly["sdot"] = Convert.ToChar(8901); // Greek Letters Supported by HTML _decodeOnly["Alpha"] = Convert.ToChar(913); _decodeOnly["Beta"] = Convert.ToChar(914); _decodeOnly["Gamma"] = Convert.ToChar(915); _decodeOnly["Delta"] = Convert.ToChar(916); _decodeOnly["Epsilon"] = Convert.ToChar(917); _decodeOnly["Zeta"] = Convert.ToChar(918); _decodeOnly["Eta"] = Convert.ToChar(919); _decodeOnly["Theta"] = Convert.ToChar(920); _decodeOnly["Iota"] = Convert.ToChar(921); _decodeOnly["Kappa"] = Convert.ToChar(922); _decodeOnly["Lambda"] = Convert.ToChar(923); _decodeOnly["Mu"] = Convert.ToChar(924); _decodeOnly["Nu"] = Convert.ToChar(925); _decodeOnly["Xi"] = Convert.ToChar(926); _decodeOnly["Omicron"] = Convert.ToChar(927); _decodeOnly["Pi"] = Convert.ToChar(928); _decodeOnly["Rho"] = Convert.ToChar(929); _decodeOnly["Sigma"] = Convert.ToChar(931); _decodeOnly["Tau"] = Convert.ToChar(932); _decodeOnly["Upsilon"] = Convert.ToChar(933); _decodeOnly["Phi"] = Convert.ToChar(934); _decodeOnly["Chi"] = Convert.ToChar(935); _decodeOnly["Psi"] = Convert.ToChar(936); _decodeOnly["Omega"] = Convert.ToChar(937); _decodeOnly["alpha"] = Convert.ToChar(945); _decodeOnly["beta"] = Convert.ToChar(946); _decodeOnly["gamma"] = Convert.ToChar(947); _decodeOnly["delta"] = Convert.ToChar(948); _decodeOnly["epsilon"] = Convert.ToChar(949); _decodeOnly["zeta"] = Convert.ToChar(950); _decodeOnly["eta"] = Convert.ToChar(951); _decodeOnly["theta"] = Convert.ToChar(952); _decodeOnly["iota"] = Convert.ToChar(953); _decodeOnly["kappa"] = Convert.ToChar(954); _decodeOnly["lambda"] = Convert.ToChar(955); _decodeOnly["mu"] = Convert.ToChar(956); _decodeOnly["nu"] = Convert.ToChar(957); _decodeOnly["xi"] = Convert.ToChar(958); _decodeOnly["omicron"] = Convert.ToChar(959); _decodeOnly["pi"] = Convert.ToChar(960); _decodeOnly["rho"] = Convert.ToChar(961); _decodeOnly["sigmaf"] = Convert.ToChar(962); _decodeOnly["sigma"] = Convert.ToChar(963); _decodeOnly["tau"] = Convert.ToChar(964); _decodeOnly["upsilon"] = Convert.ToChar(965); _decodeOnly["phi"] = Convert.ToChar(966); _decodeOnly["chi"] = Convert.ToChar(967); _decodeOnly["psi"] = Convert.ToChar(968); _decodeOnly["omega"] = Convert.ToChar(969); _decodeOnly["thetasym"] = Convert.ToChar(977); _decodeOnly["upsih"] = Convert.ToChar(978); _decodeOnly["piv"] = Convert.ToChar(982); // Other Entities Supported by HTML _decodeOnly["OElig"] = Convert.ToChar(338); _decodeOnly["oelig"] = Convert.ToChar(339); _decodeOnly["Scaron"] = Convert.ToChar(352); _decodeOnly["scaron"] = Convert.ToChar(353); _decodeOnly["Yuml"] = Convert.ToChar(376); _decodeOnly["fnof"] = Convert.ToChar(402); _decodeOnly["circ"] = Convert.ToChar(710); _decodeOnly["tilde"] = Convert.ToChar(732); _decodeOnly["ndash"] = Convert.ToChar(8211); _decodeOnly["mdash"] = Convert.ToChar(8212); _decodeOnly["lsquo"] = Convert.ToChar(8216); _decodeOnly["rsquo"] = Convert.ToChar(8217); _decodeOnly["sbquo"] = Convert.ToChar(8218); _decodeOnly["ldquo"] = Convert.ToChar(8220); _decodeOnly["rdquo"] = Convert.ToChar(8221); _decodeOnly["bdquo"] = Convert.ToChar(8222); _decodeOnly["dagger"] = Convert.ToChar(8224); _decodeOnly["Dagger"] = Convert.ToChar(8225); _decodeOnly["bull"] = Convert.ToChar(8226); _decodeOnly["hellip"] = Convert.ToChar(8230); _decodeOnly["permil"] = Convert.ToChar(8240); _decodeOnly["prime"] = Convert.ToChar(8242); _decodeOnly["Prime"] = Convert.ToChar(8243); _decodeOnly["lsaquo"] = Convert.ToChar(8249); _decodeOnly["rsaquo"] = Convert.ToChar(8250); _decodeOnly["oline"] = Convert.ToChar(8254); _decodeOnly["euro"] = Convert.ToChar(8364); _decodeOnly["trade"] = Convert.ToChar(153); _decodeOnly["larr"] = Convert.ToChar(8592); _decodeOnly["uarr"] = Convert.ToChar(8593); _decodeOnly["rarr"] = Convert.ToChar(8594); _decodeOnly["darr"] = Convert.ToChar(8595); _decodeOnly["harr"] = Convert.ToChar(8596); _decodeOnly["crarr"] = Convert.ToChar(8629); _decodeOnly["lceil"] = Convert.ToChar(8968); _decodeOnly["rceil"] = Convert.ToChar(8969); _decodeOnly["lfloor"] = Convert.ToChar(8970); _decodeOnly["rfloor"] = Convert.ToChar(8971); _decodeOnly["loz"] = Convert.ToChar(9674); _decodeOnly["spades"] = Convert.ToChar(9824); _decodeOnly["clubs"] = Convert.ToChar(9827); _decodeOnly["hearts"] = Convert.ToChar(9829); _decodeOnly["diams"] = Convert.ToChar(9830); } /// <summary> /// Is the given html tag is single tag or can have content. /// </summary> /// <param name="tagName">the tag to check (must be lower case)</param> /// <returns>true - is single tag, false - otherwise</returns> public static bool IsSingleTag(string tagName) { return _list.Contains(tagName); } /// <summary> /// Decode html encoded string to regular string.<br/> /// Handles <, >, "&. /// </summary> /// <param name="str">the string to decode</param> /// <returns>decoded string</returns> public static string DecodeHtml(string str) { if (!string.IsNullOrEmpty(str)) { str = DecodeHtmlCharByCode(str); str = DecodeHtmlCharByName(str); foreach (KeyValuePair<string, string> encPair in _encodeDecode) { str = str.Replace(encPair.Key, encPair.Value); } } return str; } /// <summary> /// Encode regular string into html encoded string.<br/> /// Handles <, >, "&. /// </summary> /// <param name="str">the string to encode</param> /// <returns>encoded string</returns> public static string EncodeHtml(string str) { if (!string.IsNullOrEmpty(str)) { for (int i = _encodeDecode.Length - 1; i >= 0; i--) { str = str.Replace(_encodeDecode[i].Value, _encodeDecode[i].Key); } } return str; } #region Private methods /// <summary> /// Check if the given char is a digit character (0-9) and (0-9, a-f for HEX) /// </summary> /// <param name="ch">the character to check</param> /// <param name="hex">optional: is hex digit check</param> /// <returns>true - is digit, false - not a digit</returns> private static bool IsDigit(char ch, bool hex) { return ( ch >= '0' && ch <= '9' ) || ( hex && ( ( ch >= 'a' && ch <= 'f' ) || ( ch >= 'A' && ch <= 'F' ) ) ); } /// <summary> /// Convert the given char to digit. /// </summary> /// <param name="ch">the character to check</param> /// <param name="hex">optional: is hex digit check</param> /// <returns>true - is digit, false - not a digit</returns> private static int ToDigit(char ch, bool hex) { if( ch >= '0' && ch <= '9' ) return ch - '0'; else if( hex ) { if( ch >= 'a' && ch <= 'f' ) return ch - 'a' + 10; else if(ch >= 'A' && ch <= 'F') return ch - 'A' + 10; } return 0; } /// <summary> /// Decode html special charecters encoded using char entity code (€) /// </summary> /// <param name="str">the string to decode</param> /// <returns>decoded string</returns> private static string DecodeHtmlCharByCode(string str) { int idx = str.IndexOf("&#", StringComparison.OrdinalIgnoreCase); while (idx > -1) { bool hex = str.Length > idx + 3 && char.ToLower(str[idx + 2]) == 'x'; int endIdx = idx + 2 + (hex ? 1 : 0); long num = 0; while (endIdx < str.Length && IsDigit(str[endIdx], hex)) num = num * (hex ? 16 : 10) + ToDigit(str[endIdx++], hex); endIdx += (endIdx < str.Length && str[endIdx] == ';') ? 1 : 0; str = str.Remove(idx, endIdx - idx); str = str.Insert(idx, Convert.ToChar(num).ToString()); idx = str.IndexOf("&#", idx + 1); } return str; } /// <summary> /// Decode html special charecters encoded using char entity name (&#euro;) /// </summary> /// <param name="str">the string to decode</param> /// <returns>decoded string</returns> private static string DecodeHtmlCharByName(string str) { int idx = str.IndexOf('&'); while (idx > -1) { int endIdx = str.IndexOf(';', idx); if (endIdx > -1 && endIdx - idx < 8) { string key = str.Substring(idx + 1, endIdx - idx - 1); char c; if (_decodeOnly.TryGetValue(key, out c)) { str = str.Remove(idx, endIdx - idx + 1); str = str.Insert(idx, c.ToString()); } } idx = str.IndexOf('&', idx + 1); } return str; } #endregion } } }