123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463 |
- using System;
- using System.Text;
- #pragma warning disable
- namespace ExCSS.Model
- {
- internal static class HtmlEncoding
- {
- internal static string Extract(string content)
- {
- var position = 0;
- content = content.ToLower();
- for (var i = position; i < content.Length - 7; i++)
- {
- if (!content.Substring(i).StartsWith("charset"))
- {
- continue;
- }
- position = i + 7;
- break;
- }
- if (position <= 0 || position >= content.Length)
- {
- return string.Empty;
- }
- for (var i = position; i < content.Length - 1; i++)
- {
- if (content[i].IsSpaceCharacter())
- {
- position++;
- }
- else
- {
- break;
- }
- }
- if (content[position] != Specification.EqualSign)
- {
- return Extract(content.Substring(position));
- }
- position++;
- for (var i = position; i < content.Length; i++)
- {
- if (content[i].IsSpaceCharacter())
- {
- position++;
- }
- else
- {
- break;
- }
- }
- if (position >= content.Length)
- {
- return string.Empty;
- }
- switch (content[position])
- {
- case Specification.DoubleQuote:
- {
- content = content.Substring(position + 1);
- var index = content.IndexOf(Specification.DoubleQuote);
- if (index != -1)
- {
- return content.Substring(0, index);
- }
- }
- break;
- case Specification.SingleQuote:
- {
- content = content.Substring(position + 1);
- var index = content.IndexOf(Specification.SingleQuote);
- if (index != -1)
- {
- return content.Substring(0, index);
- }
- }
- break;
- default:
- {
- content = content.Substring(position);
- var index = 0;
- for (var i = 0; i < content.Length; i++)
- {
- if (content[i].IsSpaceCharacter())
- {
- break;
- }
- if (content[i] == ';')
- {
- break;
- }
- index++;
- }
- return content.Substring(0, index);
- }
- }
- return string.Empty;
- }
- internal static bool IsSupported(string charset)
- {
- return Resolve(charset) != null;
- }
- internal static Encoding Resolve(string charset)
- {
- charset = charset.ToLower();
- switch (charset)
- {
- case "unicode-1-1-utf-8":
- case "utf-8":
- case "utf8":
- return Encoding.UTF8;
- case "utf-16be":
- return Encoding.BigEndianUnicode;
- case "utf-16":
- case "utf-16le":
- return Encoding.Unicode;
- case "dos-874":
- case "iso-8859-11":
- case "iso8859-11":
- case "iso885911":
- case "tis-620":
- case "windows-874":
- return Encoding.GetEncoding("windows-874");
- case "cp1250":
- case "windows-1250":
- case "x-cp1250":
- return Encoding.GetEncoding("windows-1250");
- case "cp1251":
- case "windows-1251":
- case "x-cp1251":
- return Encoding.GetEncoding("windows-1251");
- case "ansi_x3.4-1968":
- case "ascii":
- case "cp1252":
- case "cp819":
- case "csisolatin1":
- case "ibm819":
- case "iso-8859-1":
- case "iso-ir-100":
- case "iso8859-1":
- case "iso88591":
- case "iso_8859-1":
- case "iso_8859-1:1987":
- case "l1":
- case "latin1":
- case "us-ascii":
- case "windows-1252":
- case "x-cp1252":
- return Encoding.GetEncoding("windows-1252");
- case "cp1253":
- case "windows-1253":
- case "x-cp1253":
- return Encoding.GetEncoding("windows-1253");
- case "cp1254":
- case "csisolatin5":
- case "iso-8859-9":
- case "iso-ir-148":
- case "iso8859-9":
- case "iso88599":
- case "iso_8859-9":
- case "iso_8859-9:1989":
- case "l5":
- case "latin5":
- case "windows-1254":
- case "x-cp1254":
- return Encoding.GetEncoding("windows-1254");
- case "cp1255":
- case "windows-1255":
- case "x-cp1255":
- return Encoding.GetEncoding("windows-1255");
- case "cp1256":
- case "windows-1256":
- case "x-cp1256":
- return Encoding.GetEncoding("windows-1256");
- case "cp1257":
- case "windows-1257":
- case "x-cp1257":
- return Encoding.GetEncoding("windows-1257");
- case "cp1258":
- case "windows-1258":
- case "x-cp1258":
- return Encoding.GetEncoding("windows-1258");
- case "csmacintosh":
- case "mac":
- case "macintosh":
- case "x-mac-roman":
- return Encoding.GetEncoding("macintosh");
- case "x-mac-cyrillic":
- case "x-mac-ukrainian":
- return Encoding.GetEncoding("x-mac-cyrillic");
- case "866":
- case "cp866":
- case "csibm866":
- case "ibm866":
- return Encoding.GetEncoding("cp866");
- case "csisolatin2":
- case "iso-8859-2":
- case "iso-ir-101":
- case "iso8859-2":
- case "iso88592":
- case "iso_8859-2":
- case "iso_8859-2:1987":
- case "l2":
- case "latin2":
- return Encoding.GetEncoding("iso-8859-2");
- case "csisolatin3":
- case "iso-8859-3":
- case "iso-ir-109":
- case "iso8859-3":
- case "iso88593":
- case "iso_8859-3":
- case "iso_8859-3:1988":
- case "l3":
- case "latin3":
- return Encoding.GetEncoding("iso-8859-3");
- case "csisolatin4":
- case "iso-8859-4":
- case "iso-ir-110":
- case "iso8859-4":
- case "iso88594":
- case "iso_8859-4":
- case "iso_8859-4:1988":
- case "l4":
- case "latin4":
- return Encoding.GetEncoding("iso-8859-4");
- case "csisolatincyrillic":
- case "cyrillic":
- case "iso-8859-5":
- case "iso-ir-144":
- case "iso8859-5":
- case "iso88595":
- case "iso_8859-5":
- case "iso_8859-5:1988":
- return Encoding.GetEncoding("iso-8859-5");
- case "arabic":
- case "asmo-708":
- case "csiso88596e":
- case "csiso88596i":
- case "csisolatinarabic":
- case "ecma-114":
- case "iso-8859-6":
- case "iso-8859-6-e":
- case "iso-8859-6-i":
- case "iso-ir-127":
- case "iso8859-6":
- case "iso88596":
- case "iso_8859-6":
- case "iso_8859-6:1987":
- return Encoding.GetEncoding("iso-8859-6");
- case "csisolatingreek":
- case "ecma-118":
- case "elot_928":
- case "greek":
- case "greek8":
- case "iso-8859-7":
- case "iso-ir-126":
- case "iso8859-7":
- case "iso88597":
- case "iso_8859-7":
- case "iso_8859-7:1987":
- case "sun_eu_greek":
- return Encoding.GetEncoding("iso-8859-7");
- case "csiso88598e":
- case "csisolatinhebrew":
- case "hebrew":
- case "iso-8859-8":
- case "iso-8859-8-e":
- case "iso-ir-138":
- case "iso8859-8":
- case "iso88598":
- case "iso_8859-8":
- case "iso_8859-8:1988":
- case "visual":
- return Encoding.GetEncoding("iso-8859-8");
- case "csiso88598i":
- case "iso-8859-8-i":
- case "logical":
- return Encoding.GetEncoding("iso-8859-8-i");
- case "iso-8859-13":
- case "iso8859-13":
- case "iso885913":
- return Encoding.GetEncoding("iso-8859-13");
- case "csisolatin9":
- case "iso-8859-15":
- case "iso8859-15":
- case "iso885915":
- case "iso_8859-15":
- case "l9":
- return Encoding.GetEncoding("iso-8859-15");
- case "cskoi8r":
- case "koi":
- case "koi8":
- case "koi8-r":
- case "koi8_r":
- return Encoding.GetEncoding("koi8-r");
- case "koi8-u":
- return Encoding.GetEncoding("koi8-u");
- case "chinese":
- case "csgb2312":
- case "csiso58gb231280":
- case "gb2312":
- case "gb_2312":
- case "gb_2312-80":
- case "gbk":
- case "iso-ir-58":
- case "x-gbk":
- return Encoding.GetEncoding("x-cp20936");
- case "hz-gb-2312":
- return Encoding.GetEncoding("hz-gb-2312");
- case "gb18030":
- return Encoding.GetEncoding("GB18030");
- case "big5":
- case "big5-hkscs":
- case "cn-big5":
- case "csbig5":
- case "x-x-big5":
- return Encoding.GetEncoding("big5");
- case "csiso2022jp":
- case "iso-2022-jp":
- return Encoding.GetEncoding("iso-2022-jp");
- case "csiso2022kr":
- case "iso-2022-kr":
- return Encoding.GetEncoding("iso-2022-kr");
- case "iso-2022-cn":
- case "iso-2022-cn-ext":
- return Encoding.GetEncoding("iso-2022-jp");
- default:
- return null;
- }
- }
- internal static Encoding Suggest(string local)
- {
- if (local.Length < 2)
- return Encoding.UTF8;
- var firstTwo = local.Substring(0, 2).ToLower();
- switch (firstTwo)
- {
- case "ar":
- case "cy":
- case "fa":
- case "hr":
- case "kk":
- case "mk":
- case "or":
- case "ro":
- case "sr":
- case "vi":
- return Encoding.UTF8;
- case "be":
- return Encoding.GetEncoding("iso-8859-5");
- case "bg":
- case "ru":
- case "uk":
- return Encoding.GetEncoding("windows-1251");
- case "cs":
- case "hu":
- case "pl":
- case "sl":
- return Encoding.GetEncoding("iso-8859-2");
- case "tr":
- case "ku":
- return Encoding.GetEncoding("windows-1254");
- case "he":
- return Encoding.GetEncoding("windows-1255");
- case "lv":
- return Encoding.GetEncoding("iso-8859-13");
- case "ja":// Windows-31J ???? Replaced by something better anyway
- return Encoding.UTF8;
- case "ko":
- return Encoding.GetEncoding("ks_c_5601-1987");
- case "lt":
- return Encoding.GetEncoding("windows-1257");
- case "sk":
- return Encoding.GetEncoding("windows-1250");
- case "th":
- return Encoding.GetEncoding("windows-874");
- }
- if (local.Equals("zh-CN", StringComparison.OrdinalIgnoreCase))
- {
- return Encoding.GetEncoding("GB18030");
- }
- return Encoding.GetEncoding(local.Equals("zh-TW", StringComparison.OrdinalIgnoreCase)
- ? "big5"
- : "windows-1252");
- }
- }
- }
- #pragma warning restore
|