GraphemeClusterAlgorithm.cs 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using Topten.RichTextKit.Utils;
  5. namespace Topten.RichTextKit
  6. {
  7. static class GraphemeClusterAlgorithm
  8. {
  9. /// <summary>
  10. /// Given a sequence of code points, return its grapheme cluster boundaries
  11. /// </summary>
  12. /// <param name="codePoints">The code points</param>
  13. /// <returns>An enumerable of grapheme cluster boundaries</returns>
  14. public static IEnumerable<int> GetBoundaries(Slice<int> codePoints)
  15. {
  16. for (int i = 0; i <= codePoints.Length; i++)
  17. {
  18. if (IsBoundary(codePoints, i))
  19. yield return i;
  20. }
  21. }
  22. /// <summary>
  23. /// Check if a position in a code point buffer is a grapheme cluster boundary
  24. /// </summary>
  25. /// <param name="codePoints">The code points</param>
  26. /// <param name="position">The position to check</param>
  27. /// <returns></returns>
  28. public static bool IsBoundary(Slice<int> codePoints, int position)
  29. {
  30. if (codePoints.Length == 0)
  31. return false;
  32. // Get the grapheme cluster class of the character on each side
  33. var a = position <= 0 ? GraphemeClusterClass.SOT : UnicodeClasses.GraphemeClusterClass(codePoints[position - 1]);
  34. var b = position < codePoints.Length ? UnicodeClasses.GraphemeClusterClass(codePoints[position]) : GraphemeClusterClass.EOT;
  35. // Rule 11 - Special handling for ZWJ in extended pictograph
  36. if (a == GraphemeClusterClass.ZWJ)
  37. {
  38. var i = position - 2;
  39. while (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.Extend)
  40. {
  41. i--;
  42. }
  43. if (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.ExtPict)
  44. {
  45. a = GraphemeClusterClass.ExtPictZwg;
  46. }
  47. }
  48. // Special handling for regional indicator
  49. // Rule 12 and 13
  50. if (a == GraphemeClusterClass.Regional_Indicator)
  51. {
  52. // Count how many
  53. int count = 0;
  54. for (int i = position - 1; i > 0; i--)
  55. {
  56. if (UnicodeClasses.GraphemeClusterClass(codePoints[i - 1]) != GraphemeClusterClass.Regional_Indicator)
  57. break;
  58. count++;
  59. }
  60. // If odd, switch from RI to Any
  61. if ((count % 2) != 0)
  62. {
  63. a = GraphemeClusterClass.Any;
  64. }
  65. }
  66. return pairTable[(int)b][(int)a] != 0;
  67. }
  68. static byte[][] pairTable= new byte[][]
  69. {
  70. // Any CR LF Control Extend Regional_Indicator Prepend SpacingMark L V T LV LVT ExtPict ZWJ SOT EOT ExtPictZwg
  71. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Any
  72. new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // CR
  73. new byte[] { 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LF
  74. new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Control
  75. new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // Extend
  76. new byte[] { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Regional_Indicator
  77. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Prepend
  78. new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // SpacingMark
  79. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // L
  80. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, }, // V
  81. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, }, // T
  82. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LV
  83. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LVT
  84. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, }, // ExtPict
  85. new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // ZWJ
  86. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // SOT
  87. new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // EOT
  88. new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // ExtPictZwg
  89. };
  90. }
  91. }