using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace S16.Text
{
    public class UniEncodeDecode
    {
        public UniEncodeDecode()
        {
        }

        public string Encode(string text)
        {
            if (string.IsNullOrEmpty(text)) return string.Empty;

            string output = string.Empty;
            for (int i = 0; i < text.Length; i++)
            {
                char ch = text[i];
                if (ch < (char)0xFF)
                {
                    output += ch.ToString();
                }
                else
                {
                    output += "\\u" + ((int)ch).ToString("X");
                }
            }

            return output;
        }

        public string EncodeXml(string text)
        {
            if (string.IsNullOrEmpty(text)) return string.Empty;

            string output = string.Empty;
            for (int i = 0; i < text.Length; i++)
            {
                char ch = text[i];
                if (ch < (char)0xFF)
                {
                    output += ch.ToString();
                }
                else
                {
                    output += "&#x" + ((int)ch).ToString("X") + ";";
                }
            }
            return output;
        }

        public string Decode(string text)
        {
            if (string.IsNullOrEmpty(text)) return string.Empty;

            string output = string.Empty;

            Regex regex = new Regex("\\u([0-9A_F]{4})", RegexOptions.None);
            Match match = regex.Match(text);
            while (match.Success)
            {
                match = match.NextMatch();
            }

            return output;
        }

        internal static class HtmlUtils
        {
            #region Fields and Consts

            /// <summary>
            /// List of html tags that don't have content
            /// </summary>
            private static readonly List<string> _list = new List<string>(
                new[]
                {
                    "area", "base", "basefont", "br", "col",
                    "frame", "hr", "img", "input", "isindex",
                    "link", "meta", "param"
                }
                );

            /// <summary>
            /// the html encode\decode pairs
            /// </summary>
            private static readonly KeyValuePair<string, string>[] _encodeDecode = new[]
                                                           {
                                                               new KeyValuePair<string, string>("&lt;", "<"), 
                                                               new KeyValuePair<string, string>("&gt;", ">"),
                                                               new KeyValuePair<string, string>("&quot;", "\""),
                                                               new KeyValuePair<string, string>("&amp;", "&"),
                                                           };

            /// <summary>
            /// the html decode only pairs
            /// </summary>
            private static readonly Dictionary<string, char> _decodeOnly = new Dictionary<string, char>(StringComparer.InvariantCultureIgnoreCase);

            #endregion

            /// <summary>
            /// Init.
            /// </summary>
            static HtmlUtils()
            {
                _decodeOnly["nbsp"] = ' ';
                _decodeOnly["rdquo"] = '"';
                _decodeOnly["lsquo"] = '\'';
                _decodeOnly["apos"] = '\'';

                // ISO 8859-1 Symbols
                _decodeOnly["iexcl"] = Convert.ToChar(161);
                _decodeOnly["cent"] = Convert.ToChar(162);
                _decodeOnly["pound"] = Convert.ToChar(163);
                _decodeOnly["curren"] = Convert.ToChar(164);
                _decodeOnly["yen"] = Convert.ToChar(165);
                _decodeOnly["brvbar"] = Convert.ToChar(166);
                _decodeOnly["sect"] = Convert.ToChar(167);
                _decodeOnly["uml"] = Convert.ToChar(168);
                _decodeOnly["copy"] = Convert.ToChar(169);
                _decodeOnly["ordf"] = Convert.ToChar(170);
                _decodeOnly["laquo"] = Convert.ToChar(171);
                _decodeOnly["not"] = Convert.ToChar(172);
                _decodeOnly["shy"] = Convert.ToChar(173);
                _decodeOnly["reg"] = Convert.ToChar(174);
                _decodeOnly["macr"] = Convert.ToChar(175);
                _decodeOnly["deg"] = Convert.ToChar(176);
                _decodeOnly["plusmn"] = Convert.ToChar(177);
                _decodeOnly["sup2"] = Convert.ToChar(178);
                _decodeOnly["sup3"] = Convert.ToChar(179);
                _decodeOnly["acute"] = Convert.ToChar(180);
                _decodeOnly["micro"] = Convert.ToChar(181);
                _decodeOnly["para"] = Convert.ToChar(182);
                _decodeOnly["middot"] = Convert.ToChar(183);
                _decodeOnly["cedil"] = Convert.ToChar(184);
                _decodeOnly["sup1"] = Convert.ToChar(185);
                _decodeOnly["ordm"] = Convert.ToChar(186);
                _decodeOnly["raquo"] = Convert.ToChar(187);
                _decodeOnly["frac14"] = Convert.ToChar(188);
                _decodeOnly["frac12"] = Convert.ToChar(189);
                _decodeOnly["frac34"] = Convert.ToChar(190);
                _decodeOnly["iquest"] = Convert.ToChar(191);
                _decodeOnly["times"] = Convert.ToChar(215);
                _decodeOnly["divide"] = Convert.ToChar(247);

                // ISO 8859-1 Characters
                _decodeOnly["Agrave"] = Convert.ToChar(192);
                _decodeOnly["Aacute"] = Convert.ToChar(193);
                _decodeOnly["Acirc"] = Convert.ToChar(194);
                _decodeOnly["Atilde"] = Convert.ToChar(195);
                _decodeOnly["Auml"] = Convert.ToChar(196);
                _decodeOnly["Aring"] = Convert.ToChar(197);
                _decodeOnly["AElig"] = Convert.ToChar(198);
                _decodeOnly["Ccedil"] = Convert.ToChar(199);
                _decodeOnly["Egrave"] = Convert.ToChar(200);
                _decodeOnly["Eacute"] = Convert.ToChar(201);
                _decodeOnly["Ecirc"] = Convert.ToChar(202);
                _decodeOnly["Euml"] = Convert.ToChar(203);
                _decodeOnly["Igrave"] = Convert.ToChar(204);
                _decodeOnly["Iacute"] = Convert.ToChar(205);
                _decodeOnly["Icirc"] = Convert.ToChar(206);
                _decodeOnly["Iuml"] = Convert.ToChar(207);
                _decodeOnly["ETH"] = Convert.ToChar(208);
                _decodeOnly["Ntilde"] = Convert.ToChar(209);
                _decodeOnly["Ograve"] = Convert.ToChar(210);
                _decodeOnly["Oacute"] = Convert.ToChar(211);
                _decodeOnly["Ocirc"] = Convert.ToChar(212);
                _decodeOnly["Otilde"] = Convert.ToChar(213);
                _decodeOnly["Ouml"] = Convert.ToChar(214);
                _decodeOnly["Oslash"] = Convert.ToChar(216);
                _decodeOnly["Ugrave"] = Convert.ToChar(217);
                _decodeOnly["Uacute"] = Convert.ToChar(218);
                _decodeOnly["Ucirc"] = Convert.ToChar(219);
                _decodeOnly["Uuml"] = Convert.ToChar(220);
                _decodeOnly["Yacute"] = Convert.ToChar(221);
                _decodeOnly["THORN"] = Convert.ToChar(222);
                _decodeOnly["szlig"] = Convert.ToChar(223);
                _decodeOnly["agrave"] = Convert.ToChar(224);
                _decodeOnly["aacute"] = Convert.ToChar(225);
                _decodeOnly["acirc"] = Convert.ToChar(226);
                _decodeOnly["atilde"] = Convert.ToChar(227);
                _decodeOnly["auml"] = Convert.ToChar(228);
                _decodeOnly["aring"] = Convert.ToChar(229);
                _decodeOnly["aelig"] = Convert.ToChar(230);
                _decodeOnly["ccedil"] = Convert.ToChar(231);
                _decodeOnly["egrave"] = Convert.ToChar(232);
                _decodeOnly["eacute"] = Convert.ToChar(233);
                _decodeOnly["ecirc"] = Convert.ToChar(234);
                _decodeOnly["euml"] = Convert.ToChar(235);
                _decodeOnly["igrave"] = Convert.ToChar(236);
                _decodeOnly["iacute"] = Convert.ToChar(237);
                _decodeOnly["icirc"] = Convert.ToChar(238);
                _decodeOnly["iuml"] = Convert.ToChar(239);
                _decodeOnly["eth"] = Convert.ToChar(240);
                _decodeOnly["ntilde"] = Convert.ToChar(241);
                _decodeOnly["ograve"] = Convert.ToChar(242);
                _decodeOnly["oacute"] = Convert.ToChar(243);
                _decodeOnly["ocirc"] = Convert.ToChar(244);
                _decodeOnly["otilde"] = Convert.ToChar(245);
                _decodeOnly["ouml"] = Convert.ToChar(246);
                _decodeOnly["oslash"] = Convert.ToChar(248);
                _decodeOnly["ugrave"] = Convert.ToChar(249);
                _decodeOnly["uacute"] = Convert.ToChar(250);
                _decodeOnly["ucirc"] = Convert.ToChar(251);
                _decodeOnly["uuml"] = Convert.ToChar(252);
                _decodeOnly["yacute"] = Convert.ToChar(253);
                _decodeOnly["thorn"] = Convert.ToChar(254);
                _decodeOnly["yuml"] = Convert.ToChar(255);

                // Math Symbols Supported by HTML
                _decodeOnly["forall"] = Convert.ToChar(8704);
                _decodeOnly["part"] = Convert.ToChar(8706);
                _decodeOnly["exist"] = Convert.ToChar(8707);
                _decodeOnly["empty"] = Convert.ToChar(8709);
                _decodeOnly["nabla"] = Convert.ToChar(8711);
                _decodeOnly["isin"] = Convert.ToChar(8712);
                _decodeOnly["notin"] = Convert.ToChar(8713);
                _decodeOnly["ni"] = Convert.ToChar(8715);
                _decodeOnly["prod"] = Convert.ToChar(8719);
                _decodeOnly["sum"] = Convert.ToChar(8721);
                _decodeOnly["minus"] = Convert.ToChar(8722);
                _decodeOnly["lowast"] = Convert.ToChar(8727);
                _decodeOnly["radic"] = Convert.ToChar(8730);
                _decodeOnly["prop"] = Convert.ToChar(8733);
                _decodeOnly["infin"] = Convert.ToChar(8734);
                _decodeOnly["ang"] = Convert.ToChar(8736);
                _decodeOnly["and"] = Convert.ToChar(8743);
                _decodeOnly["or"] = Convert.ToChar(8744);
                _decodeOnly["cap"] = Convert.ToChar(8745);
                _decodeOnly["cup"] = Convert.ToChar(8746);
                _decodeOnly["int"] = Convert.ToChar(8747);
                _decodeOnly["there4"] = Convert.ToChar(8756);
                _decodeOnly["sim"] = Convert.ToChar(8764);
                _decodeOnly["cong"] = Convert.ToChar(8773);
                _decodeOnly["asymp"] = Convert.ToChar(8776);
                _decodeOnly["ne"] = Convert.ToChar(8800);
                _decodeOnly["equiv"] = Convert.ToChar(8801);
                _decodeOnly["le"] = Convert.ToChar(8804);
                _decodeOnly["ge"] = Convert.ToChar(8805);
                _decodeOnly["sub"] = Convert.ToChar(8834);
                _decodeOnly["sup"] = Convert.ToChar(8835);
                _decodeOnly["nsub"] = Convert.ToChar(8836);
                _decodeOnly["sube"] = Convert.ToChar(8838);
                _decodeOnly["supe"] = Convert.ToChar(8839);
                _decodeOnly["oplus"] = Convert.ToChar(8853);
                _decodeOnly["otimes"] = Convert.ToChar(8855);
                _decodeOnly["perp"] = Convert.ToChar(8869);
                _decodeOnly["sdot"] = Convert.ToChar(8901);

                // Greek Letters Supported by HTML
                _decodeOnly["Alpha"] = Convert.ToChar(913);
                _decodeOnly["Beta"] = Convert.ToChar(914);
                _decodeOnly["Gamma"] = Convert.ToChar(915);
                _decodeOnly["Delta"] = Convert.ToChar(916);
                _decodeOnly["Epsilon"] = Convert.ToChar(917);
                _decodeOnly["Zeta"] = Convert.ToChar(918);
                _decodeOnly["Eta"] = Convert.ToChar(919);
                _decodeOnly["Theta"] = Convert.ToChar(920);
                _decodeOnly["Iota"] = Convert.ToChar(921);
                _decodeOnly["Kappa"] = Convert.ToChar(922);
                _decodeOnly["Lambda"] = Convert.ToChar(923);
                _decodeOnly["Mu"] = Convert.ToChar(924);
                _decodeOnly["Nu"] = Convert.ToChar(925);
                _decodeOnly["Xi"] = Convert.ToChar(926);
                _decodeOnly["Omicron"] = Convert.ToChar(927);
                _decodeOnly["Pi"] = Convert.ToChar(928);
                _decodeOnly["Rho"] = Convert.ToChar(929);
                _decodeOnly["Sigma"] = Convert.ToChar(931);
                _decodeOnly["Tau"] = Convert.ToChar(932);
                _decodeOnly["Upsilon"] = Convert.ToChar(933);
                _decodeOnly["Phi"] = Convert.ToChar(934);
                _decodeOnly["Chi"] = Convert.ToChar(935);
                _decodeOnly["Psi"] = Convert.ToChar(936);
                _decodeOnly["Omega"] = Convert.ToChar(937);
                _decodeOnly["alpha"] = Convert.ToChar(945);
                _decodeOnly["beta"] = Convert.ToChar(946);
                _decodeOnly["gamma"] = Convert.ToChar(947);
                _decodeOnly["delta"] = Convert.ToChar(948);
                _decodeOnly["epsilon"] = Convert.ToChar(949);
                _decodeOnly["zeta"] = Convert.ToChar(950);
                _decodeOnly["eta"] = Convert.ToChar(951);
                _decodeOnly["theta"] = Convert.ToChar(952);
                _decodeOnly["iota"] = Convert.ToChar(953);
                _decodeOnly["kappa"] = Convert.ToChar(954);
                _decodeOnly["lambda"] = Convert.ToChar(955);
                _decodeOnly["mu"] = Convert.ToChar(956);
                _decodeOnly["nu"] = Convert.ToChar(957);
                _decodeOnly["xi"] = Convert.ToChar(958);
                _decodeOnly["omicron"] = Convert.ToChar(959);
                _decodeOnly["pi"] = Convert.ToChar(960);
                _decodeOnly["rho"] = Convert.ToChar(961);
                _decodeOnly["sigmaf"] = Convert.ToChar(962);
                _decodeOnly["sigma"] = Convert.ToChar(963);
                _decodeOnly["tau"] = Convert.ToChar(964);
                _decodeOnly["upsilon"] = Convert.ToChar(965);
                _decodeOnly["phi"] = Convert.ToChar(966);
                _decodeOnly["chi"] = Convert.ToChar(967);
                _decodeOnly["psi"] = Convert.ToChar(968);
                _decodeOnly["omega"] = Convert.ToChar(969);
                _decodeOnly["thetasym"] = Convert.ToChar(977);
                _decodeOnly["upsih"] = Convert.ToChar(978);
                _decodeOnly["piv"] = Convert.ToChar(982);

                // Other Entities Supported by HTML
                _decodeOnly["OElig"] = Convert.ToChar(338);
                _decodeOnly["oelig"] = Convert.ToChar(339);
                _decodeOnly["Scaron"] = Convert.ToChar(352);
                _decodeOnly["scaron"] = Convert.ToChar(353);
                _decodeOnly["Yuml"] = Convert.ToChar(376);
                _decodeOnly["fnof"] = Convert.ToChar(402);
                _decodeOnly["circ"] = Convert.ToChar(710);
                _decodeOnly["tilde"] = Convert.ToChar(732);
                _decodeOnly["ndash"] = Convert.ToChar(8211);
                _decodeOnly["mdash"] = Convert.ToChar(8212);
                _decodeOnly["lsquo"] = Convert.ToChar(8216);
                _decodeOnly["rsquo"] = Convert.ToChar(8217);
                _decodeOnly["sbquo"] = Convert.ToChar(8218);
                _decodeOnly["ldquo"] = Convert.ToChar(8220);
                _decodeOnly["rdquo"] = Convert.ToChar(8221);
                _decodeOnly["bdquo"] = Convert.ToChar(8222);
                _decodeOnly["dagger"] = Convert.ToChar(8224);
                _decodeOnly["Dagger"] = Convert.ToChar(8225);
                _decodeOnly["bull"] = Convert.ToChar(8226);
                _decodeOnly["hellip"] = Convert.ToChar(8230);
                _decodeOnly["permil"] = Convert.ToChar(8240);
                _decodeOnly["prime"] = Convert.ToChar(8242);
                _decodeOnly["Prime"] = Convert.ToChar(8243);
                _decodeOnly["lsaquo"] = Convert.ToChar(8249);
                _decodeOnly["rsaquo"] = Convert.ToChar(8250);
                _decodeOnly["oline"] = Convert.ToChar(8254);
                _decodeOnly["euro"] = Convert.ToChar(8364);
                _decodeOnly["trade"] = Convert.ToChar(153);
                _decodeOnly["larr"] = Convert.ToChar(8592);
                _decodeOnly["uarr"] = Convert.ToChar(8593);
                _decodeOnly["rarr"] = Convert.ToChar(8594);
                _decodeOnly["darr"] = Convert.ToChar(8595);
                _decodeOnly["harr"] = Convert.ToChar(8596);
                _decodeOnly["crarr"] = Convert.ToChar(8629);
                _decodeOnly["lceil"] = Convert.ToChar(8968);
                _decodeOnly["rceil"] = Convert.ToChar(8969);
                _decodeOnly["lfloor"] = Convert.ToChar(8970);
                _decodeOnly["rfloor"] = Convert.ToChar(8971);
                _decodeOnly["loz"] = Convert.ToChar(9674);
                _decodeOnly["spades"] = Convert.ToChar(9824);
                _decodeOnly["clubs"] = Convert.ToChar(9827);
                _decodeOnly["hearts"] = Convert.ToChar(9829);
                _decodeOnly["diams"] = Convert.ToChar(9830);
            }

            /// <summary>
            /// Is the given html tag is single tag or can have content.
            /// </summary>
            /// <param name="tagName">the tag to check (must be lower case)</param>
            /// <returns>true - is single tag, false - otherwise</returns>
            public static bool IsSingleTag(string tagName)
            {
                return _list.Contains(tagName);
            }

            /// <summary>
            /// Decode html encoded string to regular string.<br/>
            /// Handles &lt;, &gt;, "&amp;.
            /// </summary>
            /// <param name="str">the string to decode</param>
            /// <returns>decoded string</returns>
            public static string DecodeHtml(string str)
            {
                if (!string.IsNullOrEmpty(str))
                {
                    str = DecodeHtmlCharByCode(str);

                    str = DecodeHtmlCharByName(str);

                    foreach (KeyValuePair<string, string> encPair in _encodeDecode)
                    {
                        str = str.Replace(encPair.Key, encPair.Value);
                    }
                }
                return str;
            }

            /// <summary>
            /// Encode regular string into html encoded string.<br/>
            /// Handles &lt;, &gt;, "&amp;.
            /// </summary>
            /// <param name="str">the string to encode</param>
            /// <returns>encoded string</returns>
            public static string EncodeHtml(string str)
            {
                if (!string.IsNullOrEmpty(str))
                {
                    for (int i = _encodeDecode.Length - 1; i >= 0; i--)
                    {
                        str = str.Replace(_encodeDecode[i].Value, _encodeDecode[i].Key);
                    }
                }
                return str;
            }

            #region Private methods

            /// <summary>
            /// Check if the given char is a digit character (0-9) and (0-9, a-f for HEX)
            /// </summary>
            /// <param name="ch">the character to check</param>
            /// <param name="hex">optional: is hex digit check</param>
            /// <returns>true - is digit, false - not a digit</returns>
            private static bool IsDigit(char ch, bool hex)
            {
                return ( ch >= '0' && ch <= '9' ) || ( hex && ( ( ch >= 'a' && ch <= 'f' ) || ( ch >= 'A' && ch <= 'F' ) ) );
            }

            /// <summary>
            /// Convert the given char to digit.
            /// </summary>
            /// <param name="ch">the character to check</param>
            /// <param name="hex">optional: is hex digit check</param>
            /// <returns>true - is digit, false - not a digit</returns>
            private static int ToDigit(char ch, bool hex)
            {
                if( ch >= '0' && ch <= '9' )
                    return ch - '0';
                else if( hex )
                {
                    if( ch >= 'a' && ch <= 'f' )
                        return ch - 'a' + 10;
                    else if(ch >= 'A' && ch <= 'F')
                        return ch - 'A' + 10;
                }

                return 0;
            }

            /// <summary>
            /// Decode html special charecters encoded using char entity code (&#8364;)
            /// </summary>
            /// <param name="str">the string to decode</param>
            /// <returns>decoded string</returns>
            private static string DecodeHtmlCharByCode(string str)
            {
                int idx = str.IndexOf("&#", StringComparison.OrdinalIgnoreCase);
                while (idx > -1)
                {
                    bool hex = str.Length > idx + 3 && char.ToLower(str[idx + 2]) == 'x';
                    int endIdx = idx + 2 + (hex ? 1 : 0);

                    long num = 0;
                    while (endIdx < str.Length && IsDigit(str[endIdx], hex))
                        num = num * (hex ? 16 : 10) + ToDigit(str[endIdx++], hex);
                    endIdx += (endIdx < str.Length && str[endIdx] == ';') ? 1 : 0;

                    str = str.Remove(idx, endIdx - idx);
                    str = str.Insert(idx, Convert.ToChar(num).ToString());

                    idx = str.IndexOf("&#", idx + 1);
                }
                return str;
            }

            /// <summary>
            /// Decode html special charecters encoded using char entity name (&#euro;)
            /// </summary>
            /// <param name="str">the string to decode</param>
            /// <returns>decoded string</returns>
            private static string DecodeHtmlCharByName(string str)
            {
                int idx = str.IndexOf('&');
                while (idx > -1)
                {
                    int endIdx = str.IndexOf(';', idx);
                    if (endIdx > -1 && endIdx - idx < 8)
                    {
                        string key = str.Substring(idx + 1, endIdx - idx - 1);
                        char c;
                        if (_decodeOnly.TryGetValue(key, out c))
                        {
                            str = str.Remove(idx, endIdx - idx + 1);
                            str = str.Insert(idx, c.ToString());
                        }
                    }

                    idx = str.IndexOf('&', idx + 1);
                }
                return str;
            }

            #endregion
        }
    }
}