UnicodeTrie.cs 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. // RichTextKit
  2. // Copyright © 2019-2020 Topten Software. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. // not use this product except in compliance with the License. You may obtain
  6. // a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. // License for the specific language governing permissions and limitations
  14. // under the License.
  15. using System;
  16. using System.Collections.Generic;
  17. using System.IO;
  18. using System.IO.Compression;
  19. using System.Text;
  20. // Ported from: https://github.com/foliojs/unicode-trie
  21. namespace Topten.RichTextKit
  22. {
  23. internal class UnicodeTrie
  24. {
  25. public UnicodeTrie(Stream stream)
  26. {
  27. int dataLength;
  28. using (var bw = new BinaryReader(stream, Encoding.UTF8, true))
  29. {
  30. _highStart = bw.ReadInt32BE();
  31. _errorValue = bw.ReadUInt32BE();
  32. dataLength = bw.ReadInt32BE() / 4;
  33. }
  34. using (var infl1 = new DeflateStream(stream, CompressionMode.Decompress, true))
  35. using (var infl2 = new DeflateStream(infl1, CompressionMode.Decompress, true))
  36. using (var bw = new BinaryReader(infl2, Encoding.UTF8, true))
  37. {
  38. _data = new int[dataLength];
  39. for (int i = 0; i < _data.Length; i++)
  40. {
  41. _data[i] = bw.ReadInt32();
  42. }
  43. }
  44. }
  45. public UnicodeTrie(byte[] buf) : this(new MemoryStream(buf))
  46. {
  47. }
  48. internal UnicodeTrie(int[] data, int highStart, uint errorValue)
  49. {
  50. _data = data;
  51. _highStart = highStart;
  52. _errorValue = errorValue;
  53. }
  54. internal void Save(Stream stream)
  55. {
  56. // Write the header info
  57. using (var bw = new BinaryWriter(stream, Encoding.UTF8, true))
  58. {
  59. bw.WriteBE(_highStart);
  60. bw.WriteBE(_errorValue);
  61. bw.WriteBE(_data.Length * 4);
  62. }
  63. // Double compress the data
  64. using (var def1 = new DeflateStream(stream, CompressionLevel.Optimal, true))
  65. using (var def2 = new DeflateStream(def1, CompressionLevel.Optimal, true))
  66. using (var bw = new BinaryWriter(def2, Encoding.UTF8, true))
  67. {
  68. foreach (var v in _data)
  69. {
  70. bw.Write(v);
  71. }
  72. bw.Flush();
  73. def2.Flush();
  74. def1.Flush();
  75. }
  76. }
  77. int[] _data;
  78. int _highStart;
  79. uint _errorValue;
  80. public uint Get(int codePoint)
  81. {
  82. int index;
  83. if ((codePoint < 0) || (codePoint > 0x10ffff))
  84. {
  85. return _errorValue;
  86. }
  87. if ((codePoint < 0xd800) || ((codePoint > 0xdbff) && (codePoint <= 0xffff)))
  88. {
  89. // Ordinary BMP code point, excluding leading surrogates.
  90. // BMP uses a single level lookup. BMP index starts at offset 0 in the index.
  91. // data is stored in the index array itself.
  92. index = (_data[codePoint >> UnicodeTrieBuilder.SHIFT_2] << UnicodeTrieBuilder.INDEX_SHIFT) + (codePoint & UnicodeTrieBuilder.DATA_MASK);
  93. return (uint)_data[index];
  94. }
  95. if (codePoint <= 0xffff)
  96. {
  97. // Lead Surrogate Code Point. A Separate index section is stored for
  98. // lead surrogate code units and code points.
  99. // The main index has the code unit data.
  100. // For this function, we need the code point data.
  101. index = (_data[UnicodeTrieBuilder.LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UnicodeTrieBuilder.SHIFT_2)] << UnicodeTrieBuilder.INDEX_SHIFT) + (codePoint & UnicodeTrieBuilder.DATA_MASK);
  102. return (uint)_data[index];
  103. }
  104. if (codePoint < _highStart)
  105. {
  106. // Supplemental code point, use two-level lookup.
  107. index = _data[(UnicodeTrieBuilder.INDEX_1_OFFSET - UnicodeTrieBuilder.OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UnicodeTrieBuilder.SHIFT_1)];
  108. index = _data[index + ((codePoint >> UnicodeTrieBuilder.SHIFT_2) & UnicodeTrieBuilder.INDEX_2_MASK)];
  109. index = (index << UnicodeTrieBuilder.INDEX_SHIFT) + (codePoint & UnicodeTrieBuilder.DATA_MASK);
  110. return (uint)_data[index];
  111. }
  112. return (uint)_data[_data.Length - UnicodeTrieBuilder.DATA_GRANULARITY];
  113. }
  114. }
  115. }